github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/statmat.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package stat 6 7 import ( 8 "math" 9 10 "github.com/jingcheng-WU/gonum/floats" 11 "github.com/jingcheng-WU/gonum/mat" 12 ) 13 14 // CovarianceMatrix calculates the covariance matrix (also known as the 15 // variance-covariance matrix) calculated from a matrix of data, x, using 16 // a two-pass algorithm. The result is stored in dst. 17 // 18 // If weights is not nil the weighted covariance of x is calculated. weights 19 // must have length equal to the number of rows in input data matrix and 20 // must not contain negative elements. 21 // The dst matrix must either be empty or have the same number of 22 // columns as the input data matrix. 23 func CovarianceMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) { 24 // This is the matrix version of the two-pass algorithm. It doesn't use the 25 // additional floating point error correction that the Covariance function uses 26 // to reduce the impact of rounding during centering. 27 28 r, c := x.Dims() 29 30 if dst.IsEmpty() { 31 *dst = *(dst.GrowSym(c).(*mat.SymDense)) 32 } else if n := dst.Symmetric(); n != c { 33 panic(mat.ErrShape) 34 } 35 36 var xt mat.Dense 37 xt.CloneFrom(x.T()) 38 // Subtract the mean of each of the columns. 39 for i := 0; i < c; i++ { 40 v := xt.RawRowView(i) 41 // This will panic with ErrShape if len(weights) != len(v), so 42 // we don't have to check the size later. 43 mean := Mean(v, weights) 44 floats.AddConst(-mean, v) 45 } 46 47 if weights == nil { 48 // Calculate the normalization factor 49 // scaled by the sample size. 50 dst.SymOuterK(1/(float64(r)-1), &xt) 51 return 52 } 53 54 // Multiply by the sqrt of the weights, so that multiplication is symmetric. 55 sqrtwts := make([]float64, r) 56 for i, w := range weights { 57 if w < 0 { 58 panic("stat: negative covariance matrix weights") 59 } 60 sqrtwts[i] = math.Sqrt(w) 61 } 62 // Weight the rows. 63 for i := 0; i < c; i++ { 64 v := xt.RawRowView(i) 65 floats.Mul(v, sqrtwts) 66 } 67 68 // Calculate the normalization factor 69 // scaled by the weighted sample size. 70 dst.SymOuterK(1/(floats.Sum(weights)-1), &xt) 71 } 72 73 // CorrelationMatrix returns the correlation matrix calculated from a matrix 74 // of data, x, using a two-pass algorithm. The result is stored in dst. 75 // 76 // If weights is not nil the weighted correlation of x is calculated. weights 77 // must have length equal to the number of rows in input data matrix and 78 // must not contain negative elements. 79 // The dst matrix must either be empty or have the same number of 80 // columns as the input data matrix. 81 func CorrelationMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) { 82 // This will panic if the sizes don't match, or if weights is the wrong size. 83 CovarianceMatrix(dst, x, weights) 84 covToCorr(dst) 85 } 86 87 // covToCorr converts a covariance matrix to a correlation matrix. 88 func covToCorr(c *mat.SymDense) { 89 r := c.Symmetric() 90 91 s := make([]float64, r) 92 for i := 0; i < r; i++ { 93 s[i] = 1 / math.Sqrt(c.At(i, i)) 94 } 95 for i, sx := range s { 96 // Ensure that the diagonal has exactly ones. 97 c.SetSym(i, i, 1) 98 for j := i + 1; j < r; j++ { 99 v := c.At(i, j) 100 c.SetSym(i, j, v*sx*s[j]) 101 } 102 } 103 } 104 105 // corrToCov converts a correlation matrix to a covariance matrix. 106 // The input sigma should be vector of standard deviations corresponding 107 // to the covariance. It will panic if len(sigma) is not equal to the 108 // number of rows in the correlation matrix. 109 func corrToCov(c *mat.SymDense, sigma []float64) { 110 r, _ := c.Dims() 111 112 if r != len(sigma) { 113 panic(mat.ErrShape) 114 } 115 for i, sx := range sigma { 116 // Ensure that the diagonal has exactly sigma squared. 117 c.SetSym(i, i, sx*sx) 118 for j := i + 1; j < r; j++ { 119 v := c.At(i, j) 120 c.SetSym(i, j, v*sx*sigma[j]) 121 } 122 } 123 } 124 125 // Mahalanobis computes the Mahalanobis distance 126 // D = sqrt((x-y)ᵀ * Σ^-1 * (x-y)) 127 // between the column vectors x and y given the cholesky decomposition of Σ. 128 // Mahalanobis returns NaN if the linear solve fails. 129 // 130 // See https://en.wikipedia.org/wiki/Mahalanobis_distance for more information. 131 func Mahalanobis(x, y mat.Vector, chol *mat.Cholesky) float64 { 132 var diff mat.VecDense 133 diff.SubVec(x, y) 134 var tmp mat.VecDense 135 err := chol.SolveVecTo(&tmp, &diff) 136 if err != nil { 137 return math.NaN() 138 } 139 return math.Sqrt(mat.Dot(&tmp, &diff)) 140 }