vitess.io/vitess@v0.16.2/go/mathstats/ttest.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mathstats 6 7 import ( 8 "errors" 9 "math" 10 ) 11 12 // A LocationHypothesis specifies the alternative hypothesis of a 13 // location test such as a t-test or a Mann-Whitney U-test. The 14 // default (zero) value is to test against the alternative hypothesis 15 // that they differ. 16 type LocationHypothesis int 17 18 const ( 19 // LocationLess specifies the alternative hypothesis that the 20 // location of the first sample is less than the second. This 21 // is a one-tailed test. 22 LocationLess LocationHypothesis = -1 23 24 // LocationDiffers specifies the alternative hypothesis that 25 // the locations of the two samples are not equal. This is a 26 // two-tailed test. 27 LocationDiffers LocationHypothesis = 0 28 29 // LocationGreater specifies the alternative hypothesis that 30 // the location of the first sample is greater than the 31 // second. This is a one-tailed test. 32 LocationGreater LocationHypothesis = 1 33 ) 34 35 // A TTestResult is the result of a t-test. 36 type TTestResult struct { 37 // N1 and N2 are the sizes of the input samples. For a 38 // one-sample t-test, N2 is 0. 39 N1, N2 int 40 41 // T is the value of the t-statistic for this t-test. 42 T float64 43 44 // DoF is the degrees of freedom for this t-test. 45 DoF float64 46 47 // AltHypothesis specifies the alternative hypothesis tested 48 // by this test against the null hypothesis that there is no 49 // difference in the means of the samples. 50 AltHypothesis LocationHypothesis 51 52 // P is p-value for this t-test for the given null hypothesis. 53 P float64 54 } 55 56 func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult { 57 dist := TDist{dof} 58 var p float64 59 switch alt { 60 case LocationDiffers: 61 p = 2 * (1 - dist.CDF(math.Abs(t))) 62 case LocationLess: 63 p = dist.CDF(t) 64 case LocationGreater: 65 p = 1 - dist.CDF(t) 66 } 67 return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p} 68 } 69 70 // A TTestSample is a sample that can be used for a one or two sample 71 // t-test. 72 type TTestSample interface { 73 Weight() float64 74 Mean() float64 75 Variance() float64 76 } 77 78 var ( 79 ErrSampleSize = errors.New("sample is too small") 80 ErrZeroVariance = errors.New("sample has zero variance") 81 ErrMismatchedSamples = errors.New("samples have different lengths") 82 ) 83 84 // TwoSampleTTest performs a two-sample (unpaired) Student's t-test on 85 // samples x1 and x2. This is a test of the null hypothesis that x1 86 // and x2 are drawn from populations with equal means. It assumes x1 87 // and x2 are independent samples, that the distributions have equal 88 // variance, and that the populations are normally distributed. 89 func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) { 90 n1, n2 := x1.Weight(), x2.Weight() 91 if n1 == 0 || n2 == 0 { 92 return nil, ErrSampleSize 93 } 94 v1, v2 := x1.Variance(), x2.Variance() 95 if v1 == 0 && v2 == 0 { 96 return nil, ErrZeroVariance 97 } 98 99 dof := n1 + n2 - 2 100 v12 := ((n1-1)*v1 + (n2-1)*v2) / dof 101 t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2)) 102 return newTTestResult(int(n1), int(n2), t, dof, alt), nil 103 } 104 105 // TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test 106 // on samples x1 and x2. This is like TwoSampleTTest, but does not 107 // assume the distributions have equal variance. 108 func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) { 109 n1, n2 := x1.Weight(), x2.Weight() 110 if n1 <= 1 || n2 <= 1 { 111 // TODO: Can we still do this with n == 1? 112 return nil, ErrSampleSize 113 } 114 v1, v2 := x1.Variance(), x2.Variance() 115 if v1 == 0 && v2 == 0 { 116 return nil, ErrZeroVariance 117 } 118 119 dof := math.Pow(v1/n1+v2/n2, 2) / 120 (math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1)) 121 s := math.Sqrt(v1/n1 + v2/n2) 122 t := (x1.Mean() - x2.Mean()) / s 123 return newTTestResult(int(n1), int(n2), t, dof, alt), nil 124 } 125 126 // PairedTTest performs a two-sample paired t-test on samples x1 and 127 // x2. If μ0 is non-zero, this tests if the average of the difference 128 // is significantly different from μ0. If x1 and x2 are identical, 129 // this returns nil. 130 func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) { 131 if len(x1) != len(x2) { 132 return nil, ErrMismatchedSamples 133 } 134 if len(x1) <= 1 { 135 // TODO: Can we still do this with n == 1? 136 return nil, ErrSampleSize 137 } 138 139 dof := float64(len(x1) - 1) 140 141 diff := make([]float64, len(x1)) 142 for i := range x1 { 143 diff[i] = x1[i] - x2[i] 144 } 145 sd := StdDev(diff) 146 if sd == 0 { 147 // TODO: Can we still do the test? 148 return nil, ErrZeroVariance 149 } 150 t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd 151 return newTTestResult(len(x1), len(x2), t, dof, alt), nil 152 } 153 154 // OneSampleTTest performs a one-sample t-test on sample x. This tests 155 // the null hypothesis that the population mean is equal to μ0. This 156 // assumes the distribution of the population of sample means is 157 // normal. 158 func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) { 159 n, v := x.Weight(), x.Variance() 160 if n == 0 { 161 return nil, ErrSampleSize 162 } 163 if v == 0 { 164 // TODO: Can we still do the test? 165 return nil, ErrZeroVariance 166 } 167 dof := n - 1 168 t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v) 169 return newTTestResult(int(n), 0, t, dof, alt), nil 170 }