github.com/jgbaldwinbrown/perf@v0.1.1/pkg/stats/dist.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package stats
     6  
     7  import "math/rand"
     8  
     9  // A DistCommon is a statistical distribution. DistCommon is a base
    10  // interface provided by both continuous and discrete distributions.
    11  type DistCommon interface {
    12  	// CDF returns the cumulative probability Pr[X <= x].
    13  	//
    14  	// For continuous distributions, the CDF is the integral of
    15  	// the PDF from -inf to x.
    16  	//
    17  	// For discrete distributions, the CDF is the sum of the PMF
    18  	// at all defined points from -inf to x, inclusive. Note that
    19  	// the CDF of a discrete distribution is defined for the whole
    20  	// real line (unlike the PMF) but has discontinuities where
    21  	// the PMF is non-zero.
    22  	//
    23  	// The CDF is a monotonically increasing function and has a
    24  	// domain of all real numbers. If the distribution has bounded
    25  	// support, it has a range of [0, 1]; otherwise it has a range
    26  	// of (0, 1). Finally, CDF(-inf)==0 and CDF(inf)==1.
    27  	CDF(x float64) float64
    28  
    29  	// Bounds returns reasonable bounds for this distribution's
    30  	// PDF/PMF and CDF. The total weight outside of these bounds
    31  	// should be approximately 0.
    32  	//
    33  	// For a discrete distribution, both bounds are integer
    34  	// multiples of Step().
    35  	//
    36  	// If this distribution has finite support, it returns exact
    37  	// bounds l, h such that CDF(l')=0 for all l' < l and
    38  	// CDF(h')=1 for all h' >= h.
    39  	Bounds() (float64, float64)
    40  }
    41  
    42  // A Dist is a continuous statistical distribution.
    43  type Dist interface {
    44  	DistCommon
    45  
    46  	// PDF returns the value of the probability density function
    47  	// of this distribution at x.
    48  	PDF(x float64) float64
    49  }
    50  
    51  // A DiscreteDist is a discrete statistical distribution.
    52  //
    53  // Most discrete distributions are defined only at integral values of
    54  // the random variable. However, some are defined at other intervals,
    55  // so this interface takes a float64 value for the random variable.
    56  // The probability mass function rounds down to the nearest defined
    57  // point. Note that float64 values can exactly represent integer
    58  // values between ±2**53, so this generally shouldn't be an issue for
    59  // integer-valued distributions (likewise, for half-integer-valued
    60  // distributions, float64 can exactly represent all values between
    61  // ±2**52).
    62  type DiscreteDist interface {
    63  	DistCommon
    64  
    65  	// PMF returns the value of the probability mass function
    66  	// Pr[X = x'], where x' is x rounded down to the nearest
    67  	// defined point on the distribution.
    68  	//
    69  	// Note for implementers: for integer-valued distributions,
    70  	// round x using int(math.Floor(x)). Do not use int(x), since
    71  	// that truncates toward zero (unless all x <= 0 are handled
    72  	// the same).
    73  	PMF(x float64) float64
    74  
    75  	// Step returns s, where the distribution is defined for sℕ.
    76  	Step() float64
    77  }
    78  
    79  // TODO: Add a Support method for finite support distributions? Or
    80  // maybe just another return value from Bounds indicating that the
    81  // bounds are exact?
    82  
    83  // TODO: Plot method to return a pre-configured Plot object with
    84  // reasonable bounds and an integral function? Have to distinguish
    85  // PDF/CDF/InvCDF. Three methods? Argument?
    86  //
    87  // Doesn't have to be a method of Dist. Could be just a function that
    88  // takes a Dist and uses Bounds.
    89  
    90  // InvCDF returns the inverse CDF function of the given distribution
    91  // (also known as the quantile function or the percent point
    92  // function). This is a function f such that f(dist.CDF(x)) == x. If
    93  // dist.CDF is only weakly monotonic (that it, there are intervals
    94  // over which it is constant) and y > 0, f returns the smallest x that
    95  // satisfies this condition. In general, the inverse CDF is not
    96  // well-defined for y==0, but for convenience if y==0, f returns the
    97  // largest x that satisfies this condition. For distributions with
    98  // infinite support both the largest and smallest x are -Inf; however,
    99  // for distributions with finite support, this is the lower bound of
   100  // the support.
   101  //
   102  // If y < 0 or y > 1, f returns NaN.
   103  //
   104  // If dist implements InvCDF(float64) float64, this returns that
   105  // method. Otherwise, it returns a function that uses a generic
   106  // numerical method to construct the inverse CDF at y by finding x
   107  // such that dist.CDF(x) == y. This may have poor precision around
   108  // points of discontinuity, including f(0) and f(1).
   109  func InvCDF(dist DistCommon) func(y float64) (x float64) {
   110  	type invCDF interface {
   111  		InvCDF(float64) float64
   112  	}
   113  	if dist, ok := dist.(invCDF); ok {
   114  		return dist.InvCDF
   115  	}
   116  
   117  	// Otherwise, use a numerical algorithm.
   118  	//
   119  	// TODO: For discrete distributions, use the step size to
   120  	// inform this computation.
   121  	return func(y float64) (x float64) {
   122  		const almostInf = 1e100
   123  		const xtol = 1e-16
   124  
   125  		if y < 0 || y > 1 {
   126  			return nan
   127  		} else if y == 0 {
   128  			l, _ := dist.Bounds()
   129  			if dist.CDF(l) == 0 {
   130  				// Finite support
   131  				return l
   132  			} else {
   133  				// Infinite support
   134  				return -inf
   135  			}
   136  		} else if y == 1 {
   137  			_, h := dist.Bounds()
   138  			if dist.CDF(h) == 1 {
   139  				// Finite support
   140  				return h
   141  			} else {
   142  				// Infinite support
   143  				return inf
   144  			}
   145  		}
   146  
   147  		// Find loX, hiX for which cdf(loX) < y <= cdf(hiX).
   148  		var loX, loY, hiX, hiY float64
   149  		x1, y1 := 0.0, dist.CDF(0)
   150  		xdelta := 1.0
   151  		if y1 < y {
   152  			hiX, hiY = x1, y1
   153  			for hiY < y && hiX != inf {
   154  				loX, loY, hiX = hiX, hiY, hiX+xdelta
   155  				hiY = dist.CDF(hiX)
   156  				xdelta *= 2
   157  			}
   158  		} else {
   159  			loX, loY = x1, y1
   160  			for y <= loY && loX != -inf {
   161  				hiX, hiY, loX = loX, loY, loX-xdelta
   162  				loY = dist.CDF(loX)
   163  				xdelta *= 2
   164  			}
   165  		}
   166  		if loX == -inf {
   167  			return loX
   168  		} else if hiX == inf {
   169  			return hiX
   170  		}
   171  
   172  		// Use bisection on the interval to find the smallest
   173  		// x at which cdf(x) <= y.
   174  		_, x = bisectBool(func(x float64) bool {
   175  			return dist.CDF(x) < y
   176  		}, loX, hiX, xtol)
   177  		return
   178  	}
   179  }
   180  
   181  // Rand returns a random number generator that draws from the given
   182  // distribution. The returned generator takes an optional source of
   183  // randomness; if this is nil, it uses the default global source.
   184  //
   185  // If dist implements Rand(*rand.Rand) float64, Rand returns that
   186  // method. Otherwise, it returns a generic generator based on dist's
   187  // inverse CDF (which may in turn use an efficient implementation or a
   188  // generic numerical implementation; see InvCDF).
   189  func Rand(dist DistCommon) func(*rand.Rand) float64 {
   190  	type distRand interface {
   191  		Rand(*rand.Rand) float64
   192  	}
   193  	if dist, ok := dist.(distRand); ok {
   194  		return dist.Rand
   195  	}
   196  
   197  	// Otherwise, use a generic algorithm.
   198  	inv := InvCDF(dist)
   199  	return func(r *rand.Rand) float64 {
   200  		var y float64
   201  		for y == 0 {
   202  			if r == nil {
   203  				y = rand.Float64()
   204  			} else {
   205  				y = r.Float64()
   206  			}
   207  		}
   208  		return inv(y)
   209  	}
   210  }