github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/distuv/distribution_test.go

github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/distuv/distribution_test.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package distuv
     6  
     7  import (
     8  	"math"
     9  	"testing"
    10  
    11  	"github.com/jingcheng-WU/gonum/floats"
    12  	"github.com/jingcheng-WU/gonum/floats/scalar"
    13  	"github.com/jingcheng-WU/gonum/integrate/quad"
    14  	"github.com/jingcheng-WU/gonum/stat"
    15  )
    16  
    17  type meaner interface {
    18  	Mean() float64
    19  }
    20  
    21  type quantiler interface {
    22  	Quantile(float64) float64
    23  }
    24  
    25  type medianer interface {
    26  	quantiler
    27  	Median() float64
    28  }
    29  
    30  type varStder interface {
    31  	StdDev() float64
    32  	Variance() float64
    33  }
    34  
    35  type entropyer interface {
    36  	LogProber
    37  	Entropy() float64
    38  }
    39  
    40  type exKurtosiser interface {
    41  	ExKurtosis() float64
    42  	Mean() float64
    43  }
    44  
    45  type skewnesser interface {
    46  	StdDev() float64
    47  	Mean() float64
    48  	Skewness() float64
    49  }
    50  
    51  type cumulanter interface {
    52  	Quantiler
    53  	CDF(x float64) float64
    54  	Survival(x float64) float64
    55  }
    56  
    57  func generateSamples(x []float64, r Rander) {
    58  	for i := range x {
    59  		x[i] = r.Rand()
    60  	}
    61  }
    62  
    63  type probLogprober interface {
    64  	Prob(x float64) float64
    65  	LogProb(x float64) float64
    66  }
    67  
    68  type cumulantProber interface {
    69  	cumulanter
    70  	probLogprober
    71  }
    72  
    73  func checkMean(t *testing.T, i int, x []float64, m meaner, tol float64) {
    74  	mean := stat.Mean(x, nil)
    75  	if !scalar.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) {
    76  		t.Errorf("Mean mismatch case %v: want: %v, got: %v", i, mean, m.Mean())
    77  	}
    78  }
    79  
    80  func checkMedian(t *testing.T, i int, x []float64, m medianer, tol float64) {
    81  	median := stat.Quantile(0.5, stat.Empirical, x, nil)
    82  	if !scalar.EqualWithinAbsOrRel(median, m.Median(), tol, tol) {
    83  		t.Errorf("Median mismatch case %v: want: %v, got: %v", i, median, m.Median())
    84  	}
    85  }
    86  
    87  func checkVarAndStd(t *testing.T, i int, x []float64, v varStder, tol float64) {
    88  	variance := stat.Variance(x, nil)
    89  	if !scalar.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) {
    90  		t.Errorf("Variance mismatch case %v: want: %v, got: %v", i, variance, v.Variance())
    91  	}
    92  	std := math.Sqrt(variance)
    93  	if !scalar.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) {
    94  		t.Errorf("StdDev mismatch case %v: want: %v, got: %v", i, std, v.StdDev())
    95  	}
    96  }
    97  
    98  func checkEntropy(t *testing.T, i int, x []float64, e entropyer, tol float64) {
    99  	tmp := make([]float64, len(x))
   100  	for i, v := range x {
   101  		tmp[i] = -e.LogProb(v)
   102  	}
   103  	entropy := stat.Mean(tmp, nil)
   104  	if !scalar.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) {
   105  		t.Errorf("Entropy mismatch case %v: want: %v, got: %v", i, entropy, e.Entropy())
   106  	}
   107  }
   108  
   109  func checkExKurtosis(t *testing.T, i int, x []float64, e exKurtosiser, tol float64) {
   110  	mean := e.Mean()
   111  	tmp := make([]float64, len(x))
   112  	for i, x := range x {
   113  		tmp[i] = math.Pow(x-mean, 4)
   114  	}
   115  	variance := stat.Variance(x, nil)
   116  	mu4 := stat.Mean(tmp, nil)
   117  	kurtosis := mu4/(variance*variance) - 3
   118  	if !scalar.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) {
   119  		t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", i, kurtosis, e.ExKurtosis())
   120  	}
   121  }
   122  
   123  func checkSkewness(t *testing.T, i int, x []float64, s skewnesser, tol float64) {
   124  	mean := s.Mean()
   125  	std := s.StdDev()
   126  	tmp := make([]float64, len(x))
   127  	for i, v := range x {
   128  		tmp[i] = math.Pow(v-mean, 3)
   129  	}
   130  	mu3 := stat.Mean(tmp, nil)
   131  	skewness := mu3 / math.Pow(std, 3)
   132  	if !scalar.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) {
   133  		t.Errorf("Skewness mismatch case %v: want: %v, got: %v", i, skewness, s.Skewness())
   134  	}
   135  }
   136  
   137  func checkQuantileCDFSurvival(t *testing.T, i int, xs []float64, c cumulanter, tol float64) {
   138  	// Quantile, CDF, and survival check.
   139  	for i, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} {
   140  		x := c.Quantile(p)
   141  		cdf := c.CDF(x)
   142  		estCDF := stat.CDF(x, stat.Empirical, xs, nil)
   143  		if !scalar.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
   144  			t.Errorf("CDF mismatch case %v: want: %v, got: %v", i, estCDF, cdf)
   145  		}
   146  		if !scalar.EqualWithinAbsOrRel(cdf, p, tol, tol) {
   147  			t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", i, p, cdf)
   148  		}
   149  		if math.Abs(1-cdf-c.Survival(x)) > 1e-14 {
   150  			t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", i, 1-cdf, c.Survival(x))
   151  		}
   152  	}
   153  	if !panics(func() { c.Quantile(-0.0001) }) {
   154  		t.Errorf("Expected panic with negative argument to Quantile")
   155  	}
   156  	if !panics(func() { c.Quantile(1.0001) }) {
   157  		t.Errorf("Expected panic with Quantile argument above 1")
   158  	}
   159  }
   160  
   161  // checkProbContinuous checks that the PDF is consistent with LogPDF
   162  // and integrates to 1 from the lower to upper bound.
   163  func checkProbContinuous(t *testing.T, i int, x []float64, lower float64, upper float64, p probLogprober, tol float64) {
   164  	q := quad.Fixed(p.Prob, lower, upper, 1000000, nil, 0)
   165  	if math.Abs(q-1) > tol {
   166  		t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", i, q)
   167  	}
   168  
   169  	// Check that PDF and LogPDF are consistent.
   170  	for i, v := range x {
   171  		if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 {
   172  			t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, v, math.Log(v), p.LogProb(v))
   173  			break
   174  		}
   175  	}
   176  }
   177  
   178  // checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent.
   179  // checkProbContinuous only checks that Prob is a valid distribution (integrates
   180  // to 1 and greater than 0). However, this is also true if the PDF of a different
   181  // distribution is used. This checks that PDF is also consistent with the
   182  // CDF implementation and the random samples.
   183  func checkProbQuantContinuous(t *testing.T, i int, xs []float64, c cumulantProber, tol float64) {
   184  	ps := make([]float64, 101)
   185  	floats.Span(ps, 0, 1)
   186  
   187  	var xp, x float64
   188  	for i, p := range ps {
   189  		x = c.Quantile(p)
   190  		if p == 0 {
   191  			xp = x
   192  			if floats.Min(xs) < x {
   193  				t.Errorf("Sample of x less than Quantile(0). Case %v.", i)
   194  				break
   195  			}
   196  			continue
   197  		}
   198  		if p == 1 {
   199  			if floats.Max(xs) > x {
   200  				t.Errorf("Sample of x greater than Quantile(1). Case %v.", i)
   201  				break
   202  			}
   203  		}
   204  
   205  		// The integral of the PDF between xp and x should be the difference in
   206  		// the quantiles.
   207  		q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0)
   208  		if math.Abs(q-(p-ps[i-1])) > 1e-5 {
   209  			t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", i, p-ps[i-1], q)
   210  			break
   211  		}
   212  
   213  		pEst := stat.CDF(x, stat.Empirical, xs, nil)
   214  		if math.Abs(pEst-p) > tol {
   215  			t.Errorf("Empirical CDF doesn't match quantile. Case %v.", i)
   216  		}
   217  		xp = x
   218  	}
   219  }
   220  
   221  type moder interface {
   222  	Mode() float64
   223  }
   224  
   225  func checkMode(t *testing.T, i int, xs []float64, m moder, dx float64, tol float64) {
   226  	rXs := make([]float64, len(xs))
   227  	for j, x := range xs {
   228  		rXs[j] = math.RoundToEven(x/dx) * dx
   229  	}
   230  	want, _ := stat.Mode(rXs, nil)
   231  	got := m.Mode()
   232  	if !scalar.EqualWithinAbs(want, got, tol) {
   233  		t.Errorf("Mode mismatch case %d: want %g, got %v", i, want, got)
   234  	}
   235  }
   236  
   237  // checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions.
   238  func checkProbDiscrete(t *testing.T, i int, xs []float64, p probLogprober, tol float64) {
   239  	// Make a map of all of the unique samples.
   240  	m := make(map[float64]int)
   241  	for _, v := range xs {
   242  		m[v]++
   243  	}
   244  	for x, count := range m {
   245  		prob := float64(count) / float64(len(xs))
   246  		if math.Abs(prob-p.Prob(x)) > tol {
   247  			t.Errorf("PDF mismatch case %v at %v: want %v, got %v", i, x, prob, p.Prob(x))
   248  		}
   249  		if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 {
   250  			t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, x, math.Log(x), p.LogProb(x))
   251  		}
   252  	}
   253  }
   254  
   255  // testRandLogProb tests that LogProb and Rand give consistent results. This
   256  // can be used when the distribution does not implement CDF.
   257  func testRandLogProbContinuous(t *testing.T, i int, min float64, x []float64, f LogProber, tol float64, bins int) {
   258  	for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) {
   259  		// Get the estimated CDF from the samples
   260  		pt := stat.Quantile(cdf, stat.Empirical, x, nil)
   261  
   262  		prob := func(x float64) float64 {
   263  			return math.Exp(f.LogProb(x))
   264  		}
   265  		// Integrate the PDF to find the CDF
   266  		estCDF := quad.Fixed(prob, min, pt, 10000, nil, 0)
   267  		if !scalar.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
   268  			t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", i, cdf, estCDF)
   269  		}
   270  	}
   271  }
   272  
   273  func panics(fun func()) (b bool) {
   274  	defer func() {
   275  		err := recover()
   276  		if err != nil {
   277  			b = true
   278  		}
   279  	}()
   280  	fun()
   281  	return
   282  }