gonum.org/v1/gonum@v0.14.0/stat/cca_example_test.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package stat_test
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  
    11  	"gonum.org/v1/gonum/floats"
    12  	"gonum.org/v1/gonum/mat"
    13  	"gonum.org/v1/gonum/stat"
    14  )
    15  
    16  // symView is a helper for getting a View of a SymDense.
    17  type symView struct {
    18  	sym *mat.SymDense
    19  
    20  	i, j, r, c int
    21  }
    22  
    23  func (s symView) Dims() (r, c int) { return s.r, s.c }
    24  
    25  func (s symView) At(i, j int) float64 {
    26  	if i < 0 || s.r <= i {
    27  		panic("i out of bounds")
    28  	}
    29  	if j < 0 || s.c <= j {
    30  		panic("j out of bounds")
    31  	}
    32  	return s.sym.At(s.i+i, s.j+j)
    33  }
    34  
    35  func (s symView) T() mat.Matrix { return mat.Transpose{Matrix: s} }
    36  
    37  func ExampleCC() {
    38  	// This example is directly analogous to Example 3.5 on page 87 of
    39  	// Koch, Inge. Analysis of multivariate and high-dimensional data.
    40  	// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939
    41  
    42  	// bostonData is the Boston Housing Data of Harrison and Rubinfeld (1978)
    43  	n, _ := bostonData.Dims()
    44  	var xd, yd = 7, 4
    45  	// The variables (columns) of bostonData can be partitioned into two sets:
    46  	// those that deal with environmental/social variables (xdata), and those
    47  	// that contain information regarding the individual (ydata). Because the
    48  	// variables can be naturally partitioned in this way, these data are
    49  	// appropriate for canonical correlation analysis. The columns (variables)
    50  	// of xdata are, in order:
    51  	//  per capita crime rate by town,
    52  	//  proportion of non-retail business acres per town,
    53  	//  nitric oxide concentration (parts per 10 million),
    54  	//  weighted distances to Boston employment centres,
    55  	//  index of accessibility to radial highways,
    56  	//  pupil-teacher ratio by town, and
    57  	//  proportion of blacks by town.
    58  	xdata := bostonData.Slice(0, n, 0, xd)
    59  
    60  	// The columns (variables) of ydata are, in order:
    61  	//  average number of rooms per dwelling,
    62  	//  proportion of owner-occupied units built prior to 1940,
    63  	//  full-value property-tax rate per $10000, and
    64  	//  median value of owner-occupied homes in $1000s.
    65  	ydata := bostonData.Slice(0, n, xd, xd+yd)
    66  
    67  	// For comparison, calculate the correlation matrix for the original data.
    68  	var cor mat.SymDense
    69  	stat.CorrelationMatrix(&cor, bostonData, nil)
    70  
    71  	// Extract just those correlations that are between xdata and ydata.
    72  	var corRaw = symView{sym: &cor, i: 0, j: xd, r: xd, c: yd}
    73  
    74  	// Note that the strongest correlation between individual variables is 0.91
    75  	// between the 5th variable of xdata (index of accessibility to radial
    76  	// highways) and the 3rd variable of ydata (full-value property-tax rate per
    77  	// $10000).
    78  	fmt.Printf("corRaw = %.4f", mat.Formatted(corRaw, mat.Prefix("         ")))
    79  
    80  	// Calculate the canonical correlations.
    81  	var cc stat.CC
    82  	err := cc.CanonicalCorrelations(xdata, ydata, nil)
    83  	if err != nil {
    84  		log.Fatal(err)
    85  	}
    86  
    87  	// Unpack cc.
    88  	var pVecs, qVecs, phiVs, psiVs mat.Dense
    89  	ccors := cc.CorrsTo(nil)
    90  	cc.LeftTo(&pVecs, true)
    91  	cc.RightTo(&qVecs, true)
    92  	cc.LeftTo(&phiVs, false)
    93  	cc.RightTo(&psiVs, false)
    94  
    95  	// Canonical Correlation Matrix, or the correlations between the sphered
    96  	// data.
    97  	var corSph mat.Dense
    98  	corSph.CloneFrom(&pVecs)
    99  	col := make([]float64, xd)
   100  	for j := 0; j < yd; j++ {
   101  		mat.Col(col, j, &corSph)
   102  		floats.Scale(ccors[j], col)
   103  		corSph.SetCol(j, col)
   104  	}
   105  	corSph.Product(&corSph, qVecs.T())
   106  	fmt.Printf("\n\ncorSph = %.4f", mat.Formatted(&corSph, mat.Prefix("         ")))
   107  
   108  	// Canonical Correlations. Note that the first canonical correlation is
   109  	// 0.95, stronger than the greatest correlation in the original data, and
   110  	// much stronger than the greatest correlation in the sphered data.
   111  	fmt.Printf("\n\nccors = %.4f", ccors)
   112  
   113  	// Left and right eigenvectors of the canonical correlation matrix.
   114  	fmt.Printf("\n\npVecs = %.4f", mat.Formatted(&pVecs, mat.Prefix("        ")))
   115  	fmt.Printf("\n\nqVecs = %.4f", mat.Formatted(&qVecs, mat.Prefix("        ")))
   116  
   117  	// Canonical Correlation Transforms. These can be useful as they represent
   118  	// the canonical variables as linear combinations of the original variables.
   119  	fmt.Printf("\n\nphiVs = %.4f", mat.Formatted(&phiVs, mat.Prefix("        ")))
   120  	fmt.Printf("\n\npsiVs = %.4f", mat.Formatted(&psiVs, mat.Prefix("        ")))
   121  
   122  	// Output:
   123  	// corRaw = ⎡-0.2192   0.3527   0.5828  -0.3883⎤
   124  	//          ⎢-0.3917   0.6448   0.7208  -0.4837⎥
   125  	//          ⎢-0.3022   0.7315   0.6680  -0.4273⎥
   126  	//          ⎢ 0.2052  -0.7479  -0.5344   0.2499⎥
   127  	//          ⎢-0.2098   0.4560   0.9102  -0.3816⎥
   128  	//          ⎢-0.3555   0.2615   0.4609  -0.5078⎥
   129  	//          ⎣ 0.1281  -0.2735  -0.4418   0.3335⎦
   130  	//
   131  	// corSph = ⎡ 0.0118   0.0525   0.2300  -0.1363⎤
   132  	//          ⎢-0.1810   0.3213   0.3814  -0.1412⎥
   133  	//          ⎢ 0.0166   0.2241   0.0104  -0.2235⎥
   134  	//          ⎢ 0.0346  -0.5481  -0.0034  -0.1994⎥
   135  	//          ⎢ 0.0303  -0.0956   0.7152   0.2039⎥
   136  	//          ⎢-0.0298  -0.0022   0.0739  -0.3703⎥
   137  	//          ⎣-0.1226  -0.0746  -0.3899   0.1541⎦
   138  	//
   139  	// ccors = [0.9451 0.6787 0.5714 0.2010]
   140  	//
   141  	// pVecs = ⎡-0.2574  -0.0158  -0.2122  -0.0946⎤
   142  	//         ⎢-0.4837  -0.3837  -0.1474   0.6597⎥
   143  	//         ⎢-0.0801  -0.3494  -0.3287  -0.2862⎥
   144  	//         ⎢ 0.1278   0.7337  -0.4851   0.2248⎥
   145  	//         ⎢-0.6969   0.4342   0.3603   0.0291⎥
   146  	//         ⎢-0.0991  -0.0503  -0.6384   0.1022⎥
   147  	//         ⎣ 0.4260  -0.0323   0.2290   0.6419⎦
   148  	//
   149  	// qVecs = ⎡ 0.0182   0.1583   0.0067  -0.9872⎤
   150  	//         ⎢-0.2348  -0.9483   0.1462  -0.1554⎥
   151  	//         ⎢-0.9701   0.2406   0.0252   0.0209⎥
   152  	//         ⎣ 0.0593   0.1330   0.9889   0.0291⎦
   153  	//
   154  	// phiVs = ⎡-0.0027  -0.0093  -0.0490  -0.0155⎤
   155  	//         ⎢-0.0429   0.0242  -0.0361   0.1839⎥
   156  	//         ⎢-1.2248  -5.6031  -5.8094  -4.7927⎥
   157  	//         ⎢-0.0044   0.3424  -0.4470   0.1150⎥
   158  	//         ⎢-0.0742   0.1193   0.1116   0.0022⎥
   159  	//         ⎢-0.0233  -0.1046  -0.3853  -0.0161⎥
   160  	//         ⎣ 0.0001  -0.0005   0.0030   0.0082⎦
   161  	//
   162  	// psiVs = ⎡ 0.0302   0.3002  -0.0878  -1.9583⎤
   163  	//         ⎢-0.0065  -0.0392   0.0118  -0.0061⎥
   164  	//         ⎢-0.0052   0.0046   0.0023   0.0008⎥
   165  	//         ⎣ 0.0020  -0.0037   0.1293   0.1038⎦
   166  }