github.com/jgbaldwinbrown/perf@v0.1.1/pkg/stats/dist.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package stats 6 7 import "math/rand" 8 9 // A DistCommon is a statistical distribution. DistCommon is a base 10 // interface provided by both continuous and discrete distributions. 11 type DistCommon interface { 12 // CDF returns the cumulative probability Pr[X <= x]. 13 // 14 // For continuous distributions, the CDF is the integral of 15 // the PDF from -inf to x. 16 // 17 // For discrete distributions, the CDF is the sum of the PMF 18 // at all defined points from -inf to x, inclusive. Note that 19 // the CDF of a discrete distribution is defined for the whole 20 // real line (unlike the PMF) but has discontinuities where 21 // the PMF is non-zero. 22 // 23 // The CDF is a monotonically increasing function and has a 24 // domain of all real numbers. If the distribution has bounded 25 // support, it has a range of [0, 1]; otherwise it has a range 26 // of (0, 1). Finally, CDF(-inf)==0 and CDF(inf)==1. 27 CDF(x float64) float64 28 29 // Bounds returns reasonable bounds for this distribution's 30 // PDF/PMF and CDF. The total weight outside of these bounds 31 // should be approximately 0. 32 // 33 // For a discrete distribution, both bounds are integer 34 // multiples of Step(). 35 // 36 // If this distribution has finite support, it returns exact 37 // bounds l, h such that CDF(l')=0 for all l' < l and 38 // CDF(h')=1 for all h' >= h. 39 Bounds() (float64, float64) 40 } 41 42 // A Dist is a continuous statistical distribution. 43 type Dist interface { 44 DistCommon 45 46 // PDF returns the value of the probability density function 47 // of this distribution at x. 48 PDF(x float64) float64 49 } 50 51 // A DiscreteDist is a discrete statistical distribution. 52 // 53 // Most discrete distributions are defined only at integral values of 54 // the random variable. However, some are defined at other intervals, 55 // so this interface takes a float64 value for the random variable. 56 // The probability mass function rounds down to the nearest defined 57 // point. Note that float64 values can exactly represent integer 58 // values between ±2**53, so this generally shouldn't be an issue for 59 // integer-valued distributions (likewise, for half-integer-valued 60 // distributions, float64 can exactly represent all values between 61 // ±2**52). 62 type DiscreteDist interface { 63 DistCommon 64 65 // PMF returns the value of the probability mass function 66 // Pr[X = x'], where x' is x rounded down to the nearest 67 // defined point on the distribution. 68 // 69 // Note for implementers: for integer-valued distributions, 70 // round x using int(math.Floor(x)). Do not use int(x), since 71 // that truncates toward zero (unless all x <= 0 are handled 72 // the same). 73 PMF(x float64) float64 74 75 // Step returns s, where the distribution is defined for sℕ. 76 Step() float64 77 } 78 79 // TODO: Add a Support method for finite support distributions? Or 80 // maybe just another return value from Bounds indicating that the 81 // bounds are exact? 82 83 // TODO: Plot method to return a pre-configured Plot object with 84 // reasonable bounds and an integral function? Have to distinguish 85 // PDF/CDF/InvCDF. Three methods? Argument? 86 // 87 // Doesn't have to be a method of Dist. Could be just a function that 88 // takes a Dist and uses Bounds. 89 90 // InvCDF returns the inverse CDF function of the given distribution 91 // (also known as the quantile function or the percent point 92 // function). This is a function f such that f(dist.CDF(x)) == x. If 93 // dist.CDF is only weakly monotonic (that it, there are intervals 94 // over which it is constant) and y > 0, f returns the smallest x that 95 // satisfies this condition. In general, the inverse CDF is not 96 // well-defined for y==0, but for convenience if y==0, f returns the 97 // largest x that satisfies this condition. For distributions with 98 // infinite support both the largest and smallest x are -Inf; however, 99 // for distributions with finite support, this is the lower bound of 100 // the support. 101 // 102 // If y < 0 or y > 1, f returns NaN. 103 // 104 // If dist implements InvCDF(float64) float64, this returns that 105 // method. Otherwise, it returns a function that uses a generic 106 // numerical method to construct the inverse CDF at y by finding x 107 // such that dist.CDF(x) == y. This may have poor precision around 108 // points of discontinuity, including f(0) and f(1). 109 func InvCDF(dist DistCommon) func(y float64) (x float64) { 110 type invCDF interface { 111 InvCDF(float64) float64 112 } 113 if dist, ok := dist.(invCDF); ok { 114 return dist.InvCDF 115 } 116 117 // Otherwise, use a numerical algorithm. 118 // 119 // TODO: For discrete distributions, use the step size to 120 // inform this computation. 121 return func(y float64) (x float64) { 122 const almostInf = 1e100 123 const xtol = 1e-16 124 125 if y < 0 || y > 1 { 126 return nan 127 } else if y == 0 { 128 l, _ := dist.Bounds() 129 if dist.CDF(l) == 0 { 130 // Finite support 131 return l 132 } else { 133 // Infinite support 134 return -inf 135 } 136 } else if y == 1 { 137 _, h := dist.Bounds() 138 if dist.CDF(h) == 1 { 139 // Finite support 140 return h 141 } else { 142 // Infinite support 143 return inf 144 } 145 } 146 147 // Find loX, hiX for which cdf(loX) < y <= cdf(hiX). 148 var loX, loY, hiX, hiY float64 149 x1, y1 := 0.0, dist.CDF(0) 150 xdelta := 1.0 151 if y1 < y { 152 hiX, hiY = x1, y1 153 for hiY < y && hiX != inf { 154 loX, loY, hiX = hiX, hiY, hiX+xdelta 155 hiY = dist.CDF(hiX) 156 xdelta *= 2 157 } 158 } else { 159 loX, loY = x1, y1 160 for y <= loY && loX != -inf { 161 hiX, hiY, loX = loX, loY, loX-xdelta 162 loY = dist.CDF(loX) 163 xdelta *= 2 164 } 165 } 166 if loX == -inf { 167 return loX 168 } else if hiX == inf { 169 return hiX 170 } 171 172 // Use bisection on the interval to find the smallest 173 // x at which cdf(x) <= y. 174 _, x = bisectBool(func(x float64) bool { 175 return dist.CDF(x) < y 176 }, loX, hiX, xtol) 177 return 178 } 179 } 180 181 // Rand returns a random number generator that draws from the given 182 // distribution. The returned generator takes an optional source of 183 // randomness; if this is nil, it uses the default global source. 184 // 185 // If dist implements Rand(*rand.Rand) float64, Rand returns that 186 // method. Otherwise, it returns a generic generator based on dist's 187 // inverse CDF (which may in turn use an efficient implementation or a 188 // generic numerical implementation; see InvCDF). 189 func Rand(dist DistCommon) func(*rand.Rand) float64 { 190 type distRand interface { 191 Rand(*rand.Rand) float64 192 } 193 if dist, ok := dist.(distRand); ok { 194 return dist.Rand 195 } 196 197 // Otherwise, use a generic algorithm. 198 inv := InvCDF(dist) 199 return func(r *rand.Rand) float64 { 200 var y float64 201 for y == 0 { 202 if r == nil { 203 y = rand.Float64() 204 } else { 205 y = r.Float64() 206 } 207 } 208 return inv(y) 209 } 210 }