github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/pca_test.go (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package stat 6 7 import ( 8 "math" 9 "testing" 10 11 "github.com/jingcheng-WU/gonum/floats/scalar" 12 "github.com/jingcheng-WU/gonum/mat" 13 ) 14 15 func TestPrincipalComponents(t *testing.T) { 16 // Threshold for detecting zero variances. 17 const epsilon = 1e-15 18 tests: 19 for i, test := range []struct { 20 data mat.Matrix 21 weights []float64 22 wantVecs *mat.Dense 23 wantVars []float64 24 epsilon float64 25 }{ 26 // Test results verified using R. 27 { 28 data: mat.NewDense(3, 3, []float64{ 29 1, 2, 3, 30 4, 5, 6, 31 7, 8, 9, 32 }), 33 wantVecs: mat.NewDense(3, 3, []float64{ 34 0.5773502691896258, 0.8164965809277261, 0, 35 0.577350269189626, -0.4082482904638632, -0.7071067811865476, 36 0.5773502691896258, -0.4082482904638631, 0.7071067811865475, 37 }), 38 wantVars: []float64{27, 0, 0}, 39 epsilon: 1e-12, 40 }, 41 { // Truncated iris data. 42 data: mat.NewDense(10, 4, []float64{ 43 5.1, 3.5, 1.4, 0.2, 44 4.9, 3.0, 1.4, 0.2, 45 4.7, 3.2, 1.3, 0.2, 46 4.6, 3.1, 1.5, 0.2, 47 5.0, 3.6, 1.4, 0.2, 48 5.4, 3.9, 1.7, 0.4, 49 4.6, 3.4, 1.4, 0.3, 50 5.0, 3.4, 1.5, 0.2, 51 4.4, 2.9, 1.4, 0.2, 52 4.9, 3.1, 1.5, 0.1, 53 }), 54 wantVecs: mat.NewDense(4, 4, []float64{ 55 -0.6681110197952723, 0.706476485753953, 0.14026590216895127, 0.18666578956412122, 56 -0.7166344774801547, -0.6427036135482663, 0.13565028590525394, -0.23444848208629923, 57 -0.16441127516630702, 0.11898477441068217, -0.9136367900709544, -0.35224901970831735, 58 -0.11415613655453066, -0.27141419208874273, -0.3566402843922649, 0.8866286823515032, 59 }), 60 wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329}, 61 epsilon: 1e-12, 62 }, 63 { // Truncated iris data to form wide matrix. 64 data: mat.NewDense(3, 4, []float64{ 65 5.1, 3.5, 1.4, 0.2, 66 4.9, 3.0, 1.4, 0.2, 67 4.7, 3.2, 1.3, 0.2, 68 }), 69 wantVecs: mat.NewDense(4, 3, []float64{ 70 -0.5705187254552365, -0.7505979435049239, 0.08084520834544455, 71 -0.8166537769529318, 0.5615147645527523, -0.032338083338177705, 72 -0.08709186238359454, -0.3482870890450082, -0.22636658336724505, 73 0, 0, -0.9701425001453315, 74 }), 75 wantVars: []float64{0.0844692361537822, 0.022197430512884326, 0}, 76 epsilon: 1e-12, 77 }, 78 { // Truncated iris data transposed to check for operation on fat input. 79 data: mat.NewDense(10, 4, []float64{ 80 5.1, 3.5, 1.4, 0.2, 81 4.9, 3.0, 1.4, 0.2, 82 4.7, 3.2, 1.3, 0.2, 83 4.6, 3.1, 1.5, 0.2, 84 5.0, 3.6, 1.4, 0.2, 85 5.4, 3.9, 1.7, 0.4, 86 4.6, 3.4, 1.4, 0.3, 87 5.0, 3.4, 1.5, 0.2, 88 4.4, 2.9, 1.4, 0.2, 89 4.9, 3.1, 1.5, 0.1, 90 }).T(), 91 wantVecs: mat.NewDense(10, 4, []float64{ 92 -0.3366602459946619, -0.1373634006401213, 0.3465102523547623, -0.10290179303893479, 93 -0.31381852053861975, 0.5197145790632827, 0.5567296129086686, -0.15923062170153618, 94 -0.30857197637565165, -0.07670930360819002, 0.36159923003337235, 0.3342301027853355, 95 -0.29527124351656137, 0.16885455995353074, -0.5056204762881208, 0.32580913261444344, 96 -0.3327611073694004, -0.39365834489416474, 0.04900050959307464, 0.46812879383236555, 97 -0.34445484362044815, -0.2985206914561878, -0.1009714701361799, -0.16803618186050803, 98 -0.2986246350957691, -0.4222037823717799, -0.11838613462182519, -0.580283530375069, 99 -0.325911246223126, 0.024366468758217238, -0.12082035131864265, 0.16756027181337868, 100 -0.2814284432361538, 0.240812316260054, -0.24061437569068145, -0.365034616264623, 101 -0.31906138507685167, 0.4423912824105986, -0.2906412122303604, 0.027551046870337714, 102 }), 103 wantVars: []float64{41.8851906634233, 0.07762619213464989, 0.010516477775373585, 0}, 104 epsilon: 1e-12, 105 }, 106 { // Truncated iris data unitary weights. 107 data: mat.NewDense(10, 4, []float64{ 108 5.1, 3.5, 1.4, 0.2, 109 4.9, 3.0, 1.4, 0.2, 110 4.7, 3.2, 1.3, 0.2, 111 4.6, 3.1, 1.5, 0.2, 112 5.0, 3.6, 1.4, 0.2, 113 5.4, 3.9, 1.7, 0.4, 114 4.6, 3.4, 1.4, 0.3, 115 5.0, 3.4, 1.5, 0.2, 116 4.4, 2.9, 1.4, 0.2, 117 4.9, 3.1, 1.5, 0.1, 118 }), 119 weights: []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 120 wantVecs: mat.NewDense(4, 4, []float64{ 121 -0.6681110197952723, 0.706476485753953, 0.14026590216895127, 0.18666578956412122, 122 -0.7166344774801547, -0.6427036135482663, 0.13565028590525394, -0.23444848208629923, 123 -0.16441127516630702, 0.11898477441068217, -0.9136367900709544, -0.35224901970831735, 124 -0.11415613655453066, -0.27141419208874273, -0.3566402843922649, 0.8866286823515032, 125 }), 126 wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329}, 127 epsilon: 1e-12, 128 }, 129 { // Truncated iris data non-unitary weights. 130 data: mat.NewDense(10, 4, []float64{ 131 5.1, 3.5, 1.4, 0.2, 132 4.9, 3.0, 1.4, 0.2, 133 4.7, 3.2, 1.3, 0.2, 134 4.6, 3.1, 1.5, 0.2, 135 5.0, 3.6, 1.4, 0.2, 136 5.4, 3.9, 1.7, 0.4, 137 4.6, 3.4, 1.4, 0.3, 138 5.0, 3.4, 1.5, 0.2, 139 4.4, 2.9, 1.4, 0.2, 140 4.9, 3.1, 1.5, 0.1, 141 }), 142 weights: []float64{2, 3, 1, 1, 1, 1, 1, 1, 1, 2}, 143 wantVecs: mat.NewDense(4, 4, []float64{ 144 -0.618936145422414, 0.763069301531647, 0.124857741232537, 0.138035623677211, 145 -0.763958271606519, -0.603881770702898, 0.118267155321333, -0.194184052457746, 146 -0.143552119754944, 0.090014599564871, -0.942209377020044, -0.289018426115945, 147 -0.112599271966947, -0.212012782487076, -0.287515067921680, 0.927203898682805, 148 }), 149 wantVars: []float64{0.129621985550623, 0.022417487771598, 0.006454461065715, 0.002495076601075}, 150 epsilon: 1e-12, 151 }, 152 } { 153 var pc PC 154 vecs := &mat.Dense{} 155 var vars []float64 156 for j := 0; j < 2; j++ { 157 ok := pc.PrincipalComponents(test.data, test.weights) 158 pc.VectorsTo(vecs) 159 vars = pc.VarsTo(vars) 160 if !ok { 161 t.Errorf("unexpected SVD failure for test %d use %d", i, j) 162 continue tests 163 } 164 165 // Find the number of non-zero variances to handle 166 // non-uniqueness in SVD result (issue #21). 167 nnz := len(vars) 168 for k, v := range vars { 169 if math.Abs(v) < epsilon { 170 nnz = k 171 break 172 } 173 } 174 r, c := vecs.Dims() 175 if !mat.EqualApprox(vecs.Slice(0, r, 0, nnz), test.wantVecs.Slice(0, r, 0, nnz), test.epsilon) { 176 t.Errorf("%d use %d: unexpected PCA result got:\n%v\nwant:\n%v", 177 i, j, mat.Formatted(vecs), mat.Formatted(test.wantVecs)) 178 } 179 if !approxEqual(vars, test.wantVars, test.epsilon) { 180 t.Errorf("%d use %d: unexpected variance result got:%v, want:%v", 181 i, j, vars, test.wantVars) 182 } 183 184 // Check that the set of principal vectors is 185 // orthonormal by comparing Vᵀ*V to the identity matrix. 186 I := mat.NewDiagDense(c, nil) 187 for k := 0; k < c; k++ { 188 I.SetDiag(k, 1) 189 } 190 var vv mat.Dense 191 vv.Mul(vecs.T(), vecs) 192 if !mat.EqualApprox(&vv, I, test.epsilon) { 193 t.Errorf("%d use %d: vectors not orthonormal\n%v", i, j, mat.Formatted(I)) 194 } 195 } 196 } 197 } 198 199 func approxEqual(a, b []float64, epsilon float64) bool { 200 if len(a) != len(b) { 201 return false 202 } 203 for i, v := range a { 204 if !scalar.EqualWithinAbsOrRel(v, b[i], epsilon, epsilon) { 205 return false 206 } 207 } 208 return true 209 }