gonum.org/v1/gonum@v0.14.0/stat/distuv/distribution_test.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package distuv 6 7 import ( 8 "math" 9 "testing" 10 11 "gonum.org/v1/gonum/floats" 12 "gonum.org/v1/gonum/floats/scalar" 13 "gonum.org/v1/gonum/integrate/quad" 14 "gonum.org/v1/gonum/stat" 15 ) 16 17 type meaner interface { 18 Mean() float64 19 } 20 21 type quantiler interface { 22 Quantile(float64) float64 23 } 24 25 type medianer interface { 26 quantiler 27 Median() float64 28 } 29 30 type varStder interface { 31 StdDev() float64 32 Variance() float64 33 } 34 35 type entropyer interface { 36 LogProber 37 Entropy() float64 38 } 39 40 type exKurtosiser interface { 41 ExKurtosis() float64 42 Mean() float64 43 } 44 45 type skewnesser interface { 46 StdDev() float64 47 Mean() float64 48 Skewness() float64 49 } 50 51 type cumulanter interface { 52 Quantiler 53 CDF(x float64) float64 54 Survival(x float64) float64 55 } 56 57 func generateSamples(x []float64, r Rander) { 58 for i := range x { 59 x[i] = r.Rand() 60 } 61 } 62 63 type probLogprober interface { 64 Prob(x float64) float64 65 LogProb(x float64) float64 66 } 67 68 type cumulantProber interface { 69 cumulanter 70 probLogprober 71 } 72 73 func checkMean(t *testing.T, cas int, x []float64, m meaner, tol float64) { 74 t.Helper() 75 mean := stat.Mean(x, nil) 76 if !scalar.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) { 77 t.Errorf("Mean mismatch case %v: want: %v, got: %v", cas, mean, m.Mean()) 78 } 79 } 80 81 func checkMedian(t *testing.T, cas int, x []float64, m medianer, tol float64) { 82 t.Helper() 83 median := stat.Quantile(0.5, stat.Empirical, x, nil) 84 if !scalar.EqualWithinAbsOrRel(median, m.Median(), tol, tol) { 85 t.Errorf("Median mismatch case %v: want: %v, got: %v", cas, median, m.Median()) 86 } 87 } 88 89 func checkVarAndStd(t *testing.T, cas int, x []float64, v varStder, tol float64) { 90 t.Helper() 91 variance := stat.Variance(x, nil) 92 if !scalar.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) { 93 t.Errorf("Variance mismatch case %v: want: %v, got: %v", cas, variance, v.Variance()) 94 } 95 std := math.Sqrt(variance) 96 if !scalar.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) { 97 t.Errorf("StdDev mismatch case %v: want: %v, got: %v", cas, std, v.StdDev()) 98 } 99 } 100 101 func checkEntropy(t *testing.T, cas int, x []float64, e entropyer, tol float64) { 102 t.Helper() 103 tmp := make([]float64, len(x)) 104 for i, v := range x { 105 tmp[i] = -e.LogProb(v) 106 } 107 entropy := stat.Mean(tmp, nil) 108 if !scalar.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) { 109 t.Errorf("Entropy mismatch case %v: want: %v, got: %v", cas, entropy, e.Entropy()) 110 } 111 } 112 113 func checkExKurtosis(t *testing.T, cas int, x []float64, e exKurtosiser, tol float64) { 114 t.Helper() 115 mean := e.Mean() 116 tmp := make([]float64, len(x)) 117 for i, x := range x { 118 tmp[i] = math.Pow(x-mean, 4) 119 } 120 variance := stat.Variance(x, nil) 121 mu4 := stat.Mean(tmp, nil) 122 kurtosis := mu4/(variance*variance) - 3 123 if !scalar.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) { 124 t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", cas, kurtosis, e.ExKurtosis()) 125 } 126 } 127 128 func checkSkewness(t *testing.T, cas int, x []float64, s skewnesser, tol float64) { 129 t.Helper() 130 mean := s.Mean() 131 std := s.StdDev() 132 tmp := make([]float64, len(x)) 133 for i, v := range x { 134 tmp[i] = math.Pow(v-mean, 3) 135 } 136 mu3 := stat.Mean(tmp, nil) 137 skewness := mu3 / math.Pow(std, 3) 138 if !scalar.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) { 139 t.Errorf("Skewness mismatch case %v: want: %v, got: %v", cas, skewness, s.Skewness()) 140 } 141 } 142 143 func checkQuantileCDFSurvival(t *testing.T, cas int, xs []float64, c cumulanter, tol float64) { 144 t.Helper() 145 // Quantile, CDF, and survival check. 146 for _, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} { 147 x := c.Quantile(p) 148 cdf := c.CDF(x) 149 estCDF := stat.CDF(x, stat.Empirical, xs, nil) 150 if !scalar.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) { 151 t.Errorf("CDF mismatch case %v: want: %v, got: %v", cas, estCDF, cdf) 152 } 153 if !scalar.EqualWithinAbsOrRel(cdf, p, tol, tol) { 154 t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", cas, p, cdf) 155 } 156 if math.Abs(1-cdf-c.Survival(x)) > 1e-14 { 157 t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", cas, 1-cdf, c.Survival(x)) 158 } 159 } 160 if !panics(func() { c.Quantile(-0.0001) }) { 161 t.Errorf("Expected panic with negative argument to Quantile") 162 } 163 if !panics(func() { c.Quantile(1.0001) }) { 164 t.Errorf("Expected panic with Quantile argument above 1") 165 } 166 } 167 168 // checkProbContinuous checks that the PDF is consistent with LogPDF 169 // and integrates to 1 from the lower to upper bound. 170 func checkProbContinuous(t *testing.T, cas int, x []float64, lower float64, upper float64, p probLogprober, tol float64) { 171 t.Helper() 172 q := quad.Fixed(p.Prob, lower, upper, 1000000, nil, 0) 173 if math.Abs(q-1) > tol { 174 t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", cas, q) 175 } 176 177 // Check that PDF and LogPDF are consistent. 178 for _, v := range x { 179 if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 { 180 t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", cas, v, math.Log(v), p.LogProb(v)) 181 break 182 } 183 } 184 } 185 186 // checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent. 187 // checkProbContinuous only checks that Prob is a valid distribution (integrates 188 // to 1 and greater than 0). However, this is also true if the PDF of a different 189 // distribution is used. This checks that PDF is also consistent with the 190 // CDF implementation and the random samples. 191 func checkProbQuantContinuous(t *testing.T, cas int, xs []float64, c cumulantProber, tol float64) { 192 t.Helper() 193 ps := make([]float64, 101) 194 floats.Span(ps, 0, 1) 195 196 var xp, x float64 197 for i, p := range ps { 198 x = c.Quantile(p) 199 if p == 0 { 200 xp = x 201 if floats.Min(xs) < x { 202 t.Errorf("Sample of x less than Quantile(0). Case %v.", cas) 203 break 204 } 205 continue 206 } 207 if p == 1 { 208 if floats.Max(xs) > x { 209 t.Errorf("Sample of x greater than Quantile(1). Case %v.", cas) 210 break 211 } 212 } 213 214 // The integral of the PDF between xp and x should be the difference in 215 // the quantiles. 216 q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0) 217 if math.Abs(q-(p-ps[i-1])) > 1e-5 { 218 t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", cas, p-ps[i-1], q) 219 break 220 } 221 222 pEst := stat.CDF(x, stat.Empirical, xs, nil) 223 if math.Abs(pEst-p) > tol { 224 t.Errorf("Empirical CDF doesn't match quantile. Case %v.", cas) 225 } 226 xp = x 227 } 228 } 229 230 type moder interface { 231 Mode() float64 232 } 233 234 func checkMode(t *testing.T, cas int, xs []float64, m moder, dx float64, tol float64) { 235 t.Helper() 236 rXs := make([]float64, len(xs)) 237 for j, x := range xs { 238 rXs[j] = math.RoundToEven(x/dx) * dx 239 } 240 want, _ := stat.Mode(rXs, nil) 241 got := m.Mode() 242 if !scalar.EqualWithinAbs(want, got, tol) { 243 t.Errorf("Mode mismatch case %d: want %g, got %v", cas, want, got) 244 } 245 } 246 247 // checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions. 248 func checkProbDiscrete(t *testing.T, cas int, xs []float64, p probLogprober, tol float64) { 249 t.Helper() 250 // Make a map of all of the unique samples. 251 m := make(map[float64]int) 252 for _, v := range xs { 253 m[v]++ 254 } 255 for x, count := range m { 256 prob := float64(count) / float64(len(xs)) 257 if math.Abs(prob-p.Prob(x)) > tol { 258 t.Errorf("PDF mismatch case %v at %v: want %v, got %v", cas, x, prob, p.Prob(x)) 259 } 260 if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 { 261 t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", cas, x, math.Log(x), p.LogProb(x)) 262 } 263 } 264 } 265 266 // testRandLogProb tests that LogProb and Rand give consistent results. This 267 // can be used when the distribution does not implement CDF. 268 func testRandLogProbContinuous(t *testing.T, cas int, min float64, x []float64, f LogProber, tol float64, bins int) { 269 t.Helper() 270 for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) { 271 // Get the estimated CDF from the samples 272 pt := stat.Quantile(cdf, stat.Empirical, x, nil) 273 274 prob := func(x float64) float64 { 275 return math.Exp(f.LogProb(x)) 276 } 277 // Integrate the PDF to find the CDF 278 estCDF := quad.Fixed(prob, min, pt, 10000, nil, 0) 279 if !scalar.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) { 280 t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", cas, cdf, estCDF) 281 } 282 } 283 } 284 285 func panics(fun func()) (b bool) { 286 defer func() { 287 err := recover() 288 if err != nil { 289 b = true 290 } 291 }() 292 fun() 293 return 294 }