github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/pca_test.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package stat
     6  
     7  import (
     8  	"math"
     9  	"testing"
    10  
    11  	"github.com/jingcheng-WU/gonum/floats/scalar"
    12  	"github.com/jingcheng-WU/gonum/mat"
    13  )
    14  
    15  func TestPrincipalComponents(t *testing.T) {
    16  	// Threshold for detecting zero variances.
    17  	const epsilon = 1e-15
    18  tests:
    19  	for i, test := range []struct {
    20  		data     mat.Matrix
    21  		weights  []float64
    22  		wantVecs *mat.Dense
    23  		wantVars []float64
    24  		epsilon  float64
    25  	}{
    26  		// Test results verified using R.
    27  		{
    28  			data: mat.NewDense(3, 3, []float64{
    29  				1, 2, 3,
    30  				4, 5, 6,
    31  				7, 8, 9,
    32  			}),
    33  			wantVecs: mat.NewDense(3, 3, []float64{
    34  				0.5773502691896258, 0.8164965809277261, 0,
    35  				0.577350269189626, -0.4082482904638632, -0.7071067811865476,
    36  				0.5773502691896258, -0.4082482904638631, 0.7071067811865475,
    37  			}),
    38  			wantVars: []float64{27, 0, 0},
    39  			epsilon:  1e-12,
    40  		},
    41  		{ // Truncated iris data.
    42  			data: mat.NewDense(10, 4, []float64{
    43  				5.1, 3.5, 1.4, 0.2,
    44  				4.9, 3.0, 1.4, 0.2,
    45  				4.7, 3.2, 1.3, 0.2,
    46  				4.6, 3.1, 1.5, 0.2,
    47  				5.0, 3.6, 1.4, 0.2,
    48  				5.4, 3.9, 1.7, 0.4,
    49  				4.6, 3.4, 1.4, 0.3,
    50  				5.0, 3.4, 1.5, 0.2,
    51  				4.4, 2.9, 1.4, 0.2,
    52  				4.9, 3.1, 1.5, 0.1,
    53  			}),
    54  			wantVecs: mat.NewDense(4, 4, []float64{
    55  				-0.6681110197952723, 0.706476485753953, 0.14026590216895127, 0.18666578956412122,
    56  				-0.7166344774801547, -0.6427036135482663, 0.13565028590525394, -0.23444848208629923,
    57  				-0.16441127516630702, 0.11898477441068217, -0.9136367900709544, -0.35224901970831735,
    58  				-0.11415613655453066, -0.27141419208874273, -0.3566402843922649, 0.8866286823515032,
    59  			}),
    60  			wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
    61  			epsilon:  1e-12,
    62  		},
    63  		{ // Truncated iris data to form wide matrix.
    64  			data: mat.NewDense(3, 4, []float64{
    65  				5.1, 3.5, 1.4, 0.2,
    66  				4.9, 3.0, 1.4, 0.2,
    67  				4.7, 3.2, 1.3, 0.2,
    68  			}),
    69  			wantVecs: mat.NewDense(4, 3, []float64{
    70  				-0.5705187254552365, -0.7505979435049239, 0.08084520834544455,
    71  				-0.8166537769529318, 0.5615147645527523, -0.032338083338177705,
    72  				-0.08709186238359454, -0.3482870890450082, -0.22636658336724505,
    73  				0, 0, -0.9701425001453315,
    74  			}),
    75  			wantVars: []float64{0.0844692361537822, 0.022197430512884326, 0},
    76  			epsilon:  1e-12,
    77  		},
    78  		{ // Truncated iris data transposed to check for operation on fat input.
    79  			data: mat.NewDense(10, 4, []float64{
    80  				5.1, 3.5, 1.4, 0.2,
    81  				4.9, 3.0, 1.4, 0.2,
    82  				4.7, 3.2, 1.3, 0.2,
    83  				4.6, 3.1, 1.5, 0.2,
    84  				5.0, 3.6, 1.4, 0.2,
    85  				5.4, 3.9, 1.7, 0.4,
    86  				4.6, 3.4, 1.4, 0.3,
    87  				5.0, 3.4, 1.5, 0.2,
    88  				4.4, 2.9, 1.4, 0.2,
    89  				4.9, 3.1, 1.5, 0.1,
    90  			}).T(),
    91  			wantVecs: mat.NewDense(10, 4, []float64{
    92  				-0.3366602459946619, -0.1373634006401213, 0.3465102523547623, -0.10290179303893479,
    93  				-0.31381852053861975, 0.5197145790632827, 0.5567296129086686, -0.15923062170153618,
    94  				-0.30857197637565165, -0.07670930360819002, 0.36159923003337235, 0.3342301027853355,
    95  				-0.29527124351656137, 0.16885455995353074, -0.5056204762881208, 0.32580913261444344,
    96  				-0.3327611073694004, -0.39365834489416474, 0.04900050959307464, 0.46812879383236555,
    97  				-0.34445484362044815, -0.2985206914561878, -0.1009714701361799, -0.16803618186050803,
    98  				-0.2986246350957691, -0.4222037823717799, -0.11838613462182519, -0.580283530375069,
    99  				-0.325911246223126, 0.024366468758217238, -0.12082035131864265, 0.16756027181337868,
   100  				-0.2814284432361538, 0.240812316260054, -0.24061437569068145, -0.365034616264623,
   101  				-0.31906138507685167, 0.4423912824105986, -0.2906412122303604, 0.027551046870337714,
   102  			}),
   103  			wantVars: []float64{41.8851906634233, 0.07762619213464989, 0.010516477775373585, 0},
   104  			epsilon:  1e-12,
   105  		},
   106  		{ // Truncated iris data unitary weights.
   107  			data: mat.NewDense(10, 4, []float64{
   108  				5.1, 3.5, 1.4, 0.2,
   109  				4.9, 3.0, 1.4, 0.2,
   110  				4.7, 3.2, 1.3, 0.2,
   111  				4.6, 3.1, 1.5, 0.2,
   112  				5.0, 3.6, 1.4, 0.2,
   113  				5.4, 3.9, 1.7, 0.4,
   114  				4.6, 3.4, 1.4, 0.3,
   115  				5.0, 3.4, 1.5, 0.2,
   116  				4.4, 2.9, 1.4, 0.2,
   117  				4.9, 3.1, 1.5, 0.1,
   118  			}),
   119  			weights: []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
   120  			wantVecs: mat.NewDense(4, 4, []float64{
   121  				-0.6681110197952723, 0.706476485753953, 0.14026590216895127, 0.18666578956412122,
   122  				-0.7166344774801547, -0.6427036135482663, 0.13565028590525394, -0.23444848208629923,
   123  				-0.16441127516630702, 0.11898477441068217, -0.9136367900709544, -0.35224901970831735,
   124  				-0.11415613655453066, -0.27141419208874273, -0.3566402843922649, 0.8866286823515032,
   125  			}),
   126  			wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
   127  			epsilon:  1e-12,
   128  		},
   129  		{ // Truncated iris data non-unitary weights.
   130  			data: mat.NewDense(10, 4, []float64{
   131  				5.1, 3.5, 1.4, 0.2,
   132  				4.9, 3.0, 1.4, 0.2,
   133  				4.7, 3.2, 1.3, 0.2,
   134  				4.6, 3.1, 1.5, 0.2,
   135  				5.0, 3.6, 1.4, 0.2,
   136  				5.4, 3.9, 1.7, 0.4,
   137  				4.6, 3.4, 1.4, 0.3,
   138  				5.0, 3.4, 1.5, 0.2,
   139  				4.4, 2.9, 1.4, 0.2,
   140  				4.9, 3.1, 1.5, 0.1,
   141  			}),
   142  			weights: []float64{2, 3, 1, 1, 1, 1, 1, 1, 1, 2},
   143  			wantVecs: mat.NewDense(4, 4, []float64{
   144  				-0.618936145422414, 0.763069301531647, 0.124857741232537, 0.138035623677211,
   145  				-0.763958271606519, -0.603881770702898, 0.118267155321333, -0.194184052457746,
   146  				-0.143552119754944, 0.090014599564871, -0.942209377020044, -0.289018426115945,
   147  				-0.112599271966947, -0.212012782487076, -0.287515067921680, 0.927203898682805,
   148  			}),
   149  			wantVars: []float64{0.129621985550623, 0.022417487771598, 0.006454461065715, 0.002495076601075},
   150  			epsilon:  1e-12,
   151  		},
   152  	} {
   153  		var pc PC
   154  		vecs := &mat.Dense{}
   155  		var vars []float64
   156  		for j := 0; j < 2; j++ {
   157  			ok := pc.PrincipalComponents(test.data, test.weights)
   158  			pc.VectorsTo(vecs)
   159  			vars = pc.VarsTo(vars)
   160  			if !ok {
   161  				t.Errorf("unexpected SVD failure for test %d use %d", i, j)
   162  				continue tests
   163  			}
   164  
   165  			// Find the number of non-zero variances to handle
   166  			// non-uniqueness in SVD result (issue #21).
   167  			nnz := len(vars)
   168  			for k, v := range vars {
   169  				if math.Abs(v) < epsilon {
   170  					nnz = k
   171  					break
   172  				}
   173  			}
   174  			r, c := vecs.Dims()
   175  			if !mat.EqualApprox(vecs.Slice(0, r, 0, nnz), test.wantVecs.Slice(0, r, 0, nnz), test.epsilon) {
   176  				t.Errorf("%d use %d: unexpected PCA result got:\n%v\nwant:\n%v",
   177  					i, j, mat.Formatted(vecs), mat.Formatted(test.wantVecs))
   178  			}
   179  			if !approxEqual(vars, test.wantVars, test.epsilon) {
   180  				t.Errorf("%d use %d: unexpected variance result got:%v, want:%v",
   181  					i, j, vars, test.wantVars)
   182  			}
   183  
   184  			// Check that the set of principal vectors is
   185  			// orthonormal by comparing Vᵀ*V to the identity matrix.
   186  			I := mat.NewDiagDense(c, nil)
   187  			for k := 0; k < c; k++ {
   188  				I.SetDiag(k, 1)
   189  			}
   190  			var vv mat.Dense
   191  			vv.Mul(vecs.T(), vecs)
   192  			if !mat.EqualApprox(&vv, I, test.epsilon) {
   193  				t.Errorf("%d use %d: vectors not orthonormal\n%v", i, j, mat.Formatted(I))
   194  			}
   195  		}
   196  	}
   197  }
   198  
   199  func approxEqual(a, b []float64, epsilon float64) bool {
   200  	if len(a) != len(b) {
   201  		return false
   202  	}
   203  	for i, v := range a {
   204  		if !scalar.EqualWithinAbsOrRel(v, b[i], epsilon, epsilon) {
   205  			return false
   206  		}
   207  	}
   208  	return true
   209  }