vitess.io/vitess@v0.16.2/go/mathstats/ttest.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mathstats
     6  
     7  import (
     8  	"errors"
     9  	"math"
    10  )
    11  
    12  // A LocationHypothesis specifies the alternative hypothesis of a
    13  // location test such as a t-test or a Mann-Whitney U-test. The
    14  // default (zero) value is to test against the alternative hypothesis
    15  // that they differ.
    16  type LocationHypothesis int
    17  
    18  const (
    19  	// LocationLess specifies the alternative hypothesis that the
    20  	// location of the first sample is less than the second. This
    21  	// is a one-tailed test.
    22  	LocationLess LocationHypothesis = -1
    23  
    24  	// LocationDiffers specifies the alternative hypothesis that
    25  	// the locations of the two samples are not equal. This is a
    26  	// two-tailed test.
    27  	LocationDiffers LocationHypothesis = 0
    28  
    29  	// LocationGreater specifies the alternative hypothesis that
    30  	// the location of the first sample is greater than the
    31  	// second. This is a one-tailed test.
    32  	LocationGreater LocationHypothesis = 1
    33  )
    34  
    35  // A TTestResult is the result of a t-test.
    36  type TTestResult struct {
    37  	// N1 and N2 are the sizes of the input samples. For a
    38  	// one-sample t-test, N2 is 0.
    39  	N1, N2 int
    40  
    41  	// T is the value of the t-statistic for this t-test.
    42  	T float64
    43  
    44  	// DoF is the degrees of freedom for this t-test.
    45  	DoF float64
    46  
    47  	// AltHypothesis specifies the alternative hypothesis tested
    48  	// by this test against the null hypothesis that there is no
    49  	// difference in the means of the samples.
    50  	AltHypothesis LocationHypothesis
    51  
    52  	// P is p-value for this t-test for the given null hypothesis.
    53  	P float64
    54  }
    55  
    56  func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult {
    57  	dist := TDist{dof}
    58  	var p float64
    59  	switch alt {
    60  	case LocationDiffers:
    61  		p = 2 * (1 - dist.CDF(math.Abs(t)))
    62  	case LocationLess:
    63  		p = dist.CDF(t)
    64  	case LocationGreater:
    65  		p = 1 - dist.CDF(t)
    66  	}
    67  	return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p}
    68  }
    69  
    70  // A TTestSample is a sample that can be used for a one or two sample
    71  // t-test.
    72  type TTestSample interface {
    73  	Weight() float64
    74  	Mean() float64
    75  	Variance() float64
    76  }
    77  
    78  var (
    79  	ErrSampleSize        = errors.New("sample is too small")
    80  	ErrZeroVariance      = errors.New("sample has zero variance")
    81  	ErrMismatchedSamples = errors.New("samples have different lengths")
    82  )
    83  
    84  // TwoSampleTTest performs a two-sample (unpaired) Student's t-test on
    85  // samples x1 and x2. This is a test of the null hypothesis that x1
    86  // and x2 are drawn from populations with equal means. It assumes x1
    87  // and x2 are independent samples, that the distributions have equal
    88  // variance, and that the populations are normally distributed.
    89  func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
    90  	n1, n2 := x1.Weight(), x2.Weight()
    91  	if n1 == 0 || n2 == 0 {
    92  		return nil, ErrSampleSize
    93  	}
    94  	v1, v2 := x1.Variance(), x2.Variance()
    95  	if v1 == 0 && v2 == 0 {
    96  		return nil, ErrZeroVariance
    97  	}
    98  
    99  	dof := n1 + n2 - 2
   100  	v12 := ((n1-1)*v1 + (n2-1)*v2) / dof
   101  	t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2))
   102  	return newTTestResult(int(n1), int(n2), t, dof, alt), nil
   103  }
   104  
   105  // TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test
   106  // on samples x1 and x2. This is like TwoSampleTTest, but does not
   107  // assume the distributions have equal variance.
   108  func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
   109  	n1, n2 := x1.Weight(), x2.Weight()
   110  	if n1 <= 1 || n2 <= 1 {
   111  		// TODO: Can we still do this with n == 1?
   112  		return nil, ErrSampleSize
   113  	}
   114  	v1, v2 := x1.Variance(), x2.Variance()
   115  	if v1 == 0 && v2 == 0 {
   116  		return nil, ErrZeroVariance
   117  	}
   118  
   119  	dof := math.Pow(v1/n1+v2/n2, 2) /
   120  		(math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1))
   121  	s := math.Sqrt(v1/n1 + v2/n2)
   122  	t := (x1.Mean() - x2.Mean()) / s
   123  	return newTTestResult(int(n1), int(n2), t, dof, alt), nil
   124  }
   125  
   126  // PairedTTest performs a two-sample paired t-test on samples x1 and
   127  // x2. If μ0 is non-zero, this tests if the average of the difference
   128  // is significantly different from μ0. If x1 and x2 are identical,
   129  // this returns nil.
   130  func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
   131  	if len(x1) != len(x2) {
   132  		return nil, ErrMismatchedSamples
   133  	}
   134  	if len(x1) <= 1 {
   135  		// TODO: Can we still do this with n == 1?
   136  		return nil, ErrSampleSize
   137  	}
   138  
   139  	dof := float64(len(x1) - 1)
   140  
   141  	diff := make([]float64, len(x1))
   142  	for i := range x1 {
   143  		diff[i] = x1[i] - x2[i]
   144  	}
   145  	sd := StdDev(diff)
   146  	if sd == 0 {
   147  		// TODO: Can we still do the test?
   148  		return nil, ErrZeroVariance
   149  	}
   150  	t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd
   151  	return newTTestResult(len(x1), len(x2), t, dof, alt), nil
   152  }
   153  
   154  // OneSampleTTest performs a one-sample t-test on sample x. This tests
   155  // the null hypothesis that the population mean is equal to μ0. This
   156  // assumes the distribution of the population of sample means is
   157  // normal.
   158  func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
   159  	n, v := x.Weight(), x.Variance()
   160  	if n == 0 {
   161  		return nil, ErrSampleSize
   162  	}
   163  	if v == 0 {
   164  		// TODO: Can we still do the test?
   165  		return nil, ErrZeroVariance
   166  	}
   167  	dof := n - 1
   168  	t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v)
   169  	return newTTestResult(int(n), 0, t, dof, alt), nil
   170  }