gonum.org/v1/gonum@v0.14.0/stat/distuv/distribution_test.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package distuv
     6  
     7  import (
     8  	"math"
     9  	"testing"
    10  
    11  	"gonum.org/v1/gonum/floats"
    12  	"gonum.org/v1/gonum/floats/scalar"
    13  	"gonum.org/v1/gonum/integrate/quad"
    14  	"gonum.org/v1/gonum/stat"
    15  )
    16  
    17  type meaner interface {
    18  	Mean() float64
    19  }
    20  
    21  type quantiler interface {
    22  	Quantile(float64) float64
    23  }
    24  
    25  type medianer interface {
    26  	quantiler
    27  	Median() float64
    28  }
    29  
    30  type varStder interface {
    31  	StdDev() float64
    32  	Variance() float64
    33  }
    34  
    35  type entropyer interface {
    36  	LogProber
    37  	Entropy() float64
    38  }
    39  
    40  type exKurtosiser interface {
    41  	ExKurtosis() float64
    42  	Mean() float64
    43  }
    44  
    45  type skewnesser interface {
    46  	StdDev() float64
    47  	Mean() float64
    48  	Skewness() float64
    49  }
    50  
    51  type cumulanter interface {
    52  	Quantiler
    53  	CDF(x float64) float64
    54  	Survival(x float64) float64
    55  }
    56  
    57  func generateSamples(x []float64, r Rander) {
    58  	for i := range x {
    59  		x[i] = r.Rand()
    60  	}
    61  }
    62  
    63  type probLogprober interface {
    64  	Prob(x float64) float64
    65  	LogProb(x float64) float64
    66  }
    67  
    68  type cumulantProber interface {
    69  	cumulanter
    70  	probLogprober
    71  }
    72  
    73  func checkMean(t *testing.T, cas int, x []float64, m meaner, tol float64) {
    74  	t.Helper()
    75  	mean := stat.Mean(x, nil)
    76  	if !scalar.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) {
    77  		t.Errorf("Mean mismatch case %v: want: %v, got: %v", cas, mean, m.Mean())
    78  	}
    79  }
    80  
    81  func checkMedian(t *testing.T, cas int, x []float64, m medianer, tol float64) {
    82  	t.Helper()
    83  	median := stat.Quantile(0.5, stat.Empirical, x, nil)
    84  	if !scalar.EqualWithinAbsOrRel(median, m.Median(), tol, tol) {
    85  		t.Errorf("Median mismatch case %v: want: %v, got: %v", cas, median, m.Median())
    86  	}
    87  }
    88  
    89  func checkVarAndStd(t *testing.T, cas int, x []float64, v varStder, tol float64) {
    90  	t.Helper()
    91  	variance := stat.Variance(x, nil)
    92  	if !scalar.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) {
    93  		t.Errorf("Variance mismatch case %v: want: %v, got: %v", cas, variance, v.Variance())
    94  	}
    95  	std := math.Sqrt(variance)
    96  	if !scalar.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) {
    97  		t.Errorf("StdDev mismatch case %v: want: %v, got: %v", cas, std, v.StdDev())
    98  	}
    99  }
   100  
   101  func checkEntropy(t *testing.T, cas int, x []float64, e entropyer, tol float64) {
   102  	t.Helper()
   103  	tmp := make([]float64, len(x))
   104  	for i, v := range x {
   105  		tmp[i] = -e.LogProb(v)
   106  	}
   107  	entropy := stat.Mean(tmp, nil)
   108  	if !scalar.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) {
   109  		t.Errorf("Entropy mismatch case %v: want: %v, got: %v", cas, entropy, e.Entropy())
   110  	}
   111  }
   112  
   113  func checkExKurtosis(t *testing.T, cas int, x []float64, e exKurtosiser, tol float64) {
   114  	t.Helper()
   115  	mean := e.Mean()
   116  	tmp := make([]float64, len(x))
   117  	for i, x := range x {
   118  		tmp[i] = math.Pow(x-mean, 4)
   119  	}
   120  	variance := stat.Variance(x, nil)
   121  	mu4 := stat.Mean(tmp, nil)
   122  	kurtosis := mu4/(variance*variance) - 3
   123  	if !scalar.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) {
   124  		t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", cas, kurtosis, e.ExKurtosis())
   125  	}
   126  }
   127  
   128  func checkSkewness(t *testing.T, cas int, x []float64, s skewnesser, tol float64) {
   129  	t.Helper()
   130  	mean := s.Mean()
   131  	std := s.StdDev()
   132  	tmp := make([]float64, len(x))
   133  	for i, v := range x {
   134  		tmp[i] = math.Pow(v-mean, 3)
   135  	}
   136  	mu3 := stat.Mean(tmp, nil)
   137  	skewness := mu3 / math.Pow(std, 3)
   138  	if !scalar.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) {
   139  		t.Errorf("Skewness mismatch case %v: want: %v, got: %v", cas, skewness, s.Skewness())
   140  	}
   141  }
   142  
   143  func checkQuantileCDFSurvival(t *testing.T, cas int, xs []float64, c cumulanter, tol float64) {
   144  	t.Helper()
   145  	// Quantile, CDF, and survival check.
   146  	for _, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} {
   147  		x := c.Quantile(p)
   148  		cdf := c.CDF(x)
   149  		estCDF := stat.CDF(x, stat.Empirical, xs, nil)
   150  		if !scalar.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
   151  			t.Errorf("CDF mismatch case %v: want: %v, got: %v", cas, estCDF, cdf)
   152  		}
   153  		if !scalar.EqualWithinAbsOrRel(cdf, p, tol, tol) {
   154  			t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", cas, p, cdf)
   155  		}
   156  		if math.Abs(1-cdf-c.Survival(x)) > 1e-14 {
   157  			t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", cas, 1-cdf, c.Survival(x))
   158  		}
   159  	}
   160  	if !panics(func() { c.Quantile(-0.0001) }) {
   161  		t.Errorf("Expected panic with negative argument to Quantile")
   162  	}
   163  	if !panics(func() { c.Quantile(1.0001) }) {
   164  		t.Errorf("Expected panic with Quantile argument above 1")
   165  	}
   166  }
   167  
   168  // checkProbContinuous checks that the PDF is consistent with LogPDF
   169  // and integrates to 1 from the lower to upper bound.
   170  func checkProbContinuous(t *testing.T, cas int, x []float64, lower float64, upper float64, p probLogprober, tol float64) {
   171  	t.Helper()
   172  	q := quad.Fixed(p.Prob, lower, upper, 1000000, nil, 0)
   173  	if math.Abs(q-1) > tol {
   174  		t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", cas, q)
   175  	}
   176  
   177  	// Check that PDF and LogPDF are consistent.
   178  	for _, v := range x {
   179  		if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 {
   180  			t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", cas, v, math.Log(v), p.LogProb(v))
   181  			break
   182  		}
   183  	}
   184  }
   185  
   186  // checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent.
   187  // checkProbContinuous only checks that Prob is a valid distribution (integrates
   188  // to 1 and greater than 0). However, this is also true if the PDF of a different
   189  // distribution is used. This checks that PDF is also consistent with the
   190  // CDF implementation and the random samples.
   191  func checkProbQuantContinuous(t *testing.T, cas int, xs []float64, c cumulantProber, tol float64) {
   192  	t.Helper()
   193  	ps := make([]float64, 101)
   194  	floats.Span(ps, 0, 1)
   195  
   196  	var xp, x float64
   197  	for i, p := range ps {
   198  		x = c.Quantile(p)
   199  		if p == 0 {
   200  			xp = x
   201  			if floats.Min(xs) < x {
   202  				t.Errorf("Sample of x less than Quantile(0). Case %v.", cas)
   203  				break
   204  			}
   205  			continue
   206  		}
   207  		if p == 1 {
   208  			if floats.Max(xs) > x {
   209  				t.Errorf("Sample of x greater than Quantile(1). Case %v.", cas)
   210  				break
   211  			}
   212  		}
   213  
   214  		// The integral of the PDF between xp and x should be the difference in
   215  		// the quantiles.
   216  		q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0)
   217  		if math.Abs(q-(p-ps[i-1])) > 1e-5 {
   218  			t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", cas, p-ps[i-1], q)
   219  			break
   220  		}
   221  
   222  		pEst := stat.CDF(x, stat.Empirical, xs, nil)
   223  		if math.Abs(pEst-p) > tol {
   224  			t.Errorf("Empirical CDF doesn't match quantile. Case %v.", cas)
   225  		}
   226  		xp = x
   227  	}
   228  }
   229  
   230  type moder interface {
   231  	Mode() float64
   232  }
   233  
   234  func checkMode(t *testing.T, cas int, xs []float64, m moder, dx float64, tol float64) {
   235  	t.Helper()
   236  	rXs := make([]float64, len(xs))
   237  	for j, x := range xs {
   238  		rXs[j] = math.RoundToEven(x/dx) * dx
   239  	}
   240  	want, _ := stat.Mode(rXs, nil)
   241  	got := m.Mode()
   242  	if !scalar.EqualWithinAbs(want, got, tol) {
   243  		t.Errorf("Mode mismatch case %d: want %g, got %v", cas, want, got)
   244  	}
   245  }
   246  
   247  // checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions.
   248  func checkProbDiscrete(t *testing.T, cas int, xs []float64, p probLogprober, tol float64) {
   249  	t.Helper()
   250  	// Make a map of all of the unique samples.
   251  	m := make(map[float64]int)
   252  	for _, v := range xs {
   253  		m[v]++
   254  	}
   255  	for x, count := range m {
   256  		prob := float64(count) / float64(len(xs))
   257  		if math.Abs(prob-p.Prob(x)) > tol {
   258  			t.Errorf("PDF mismatch case %v at %v: want %v, got %v", cas, x, prob, p.Prob(x))
   259  		}
   260  		if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 {
   261  			t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", cas, x, math.Log(x), p.LogProb(x))
   262  		}
   263  	}
   264  }
   265  
   266  // testRandLogProb tests that LogProb and Rand give consistent results. This
   267  // can be used when the distribution does not implement CDF.
   268  func testRandLogProbContinuous(t *testing.T, cas int, min float64, x []float64, f LogProber, tol float64, bins int) {
   269  	t.Helper()
   270  	for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) {
   271  		// Get the estimated CDF from the samples
   272  		pt := stat.Quantile(cdf, stat.Empirical, x, nil)
   273  
   274  		prob := func(x float64) float64 {
   275  			return math.Exp(f.LogProb(x))
   276  		}
   277  		// Integrate the PDF to find the CDF
   278  		estCDF := quad.Fixed(prob, min, pt, 10000, nil, 0)
   279  		if !scalar.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
   280  			t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", cas, cdf, estCDF)
   281  		}
   282  	}
   283  }
   284  
   285  func panics(fun func()) (b bool) {
   286  	defer func() {
   287  		err := recover()
   288  		if err != nil {
   289  			b = true
   290  		}
   291  	}()
   292  	fun()
   293  	return
   294  }