github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/distuv/norm.go

github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/distuv/norm.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package distuv
     6  
     7  import (
     8  	"math"
     9  
    10  	"golang.org/x/exp/rand"
    11  
    12  	"github.com/jingcheng-WU/gonum/floats"
    13  	"github.com/jingcheng-WU/gonum/mathext"
    14  	"github.com/jingcheng-WU/gonum/stat"
    15  )
    16  
    17  // UnitNormal is an instantiation of the normal distribution with Mu = 0 and Sigma = 1.
    18  var UnitNormal = Normal{Mu: 0, Sigma: 1}
    19  
    20  // Normal respresents a normal (Gaussian) distribution (https://en.wikipedia.org/wiki/Normal_distribution).
    21  type Normal struct {
    22  	Mu    float64 // Mean of the normal distribution
    23  	Sigma float64 // Standard deviation of the normal distribution
    24  	Src   rand.Source
    25  
    26  	// Needs to be Mu and Sigma and not Mean and StdDev because Normal has functions
    27  	// Mean and StdDev
    28  }
    29  
    30  // CDF computes the value of the cumulative density function at x.
    31  func (n Normal) CDF(x float64) float64 {
    32  	return 0.5 * math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2))
    33  }
    34  
    35  // ConjugateUpdate updates the parameters of the distribution from the sufficient
    36  // statistics of a set of samples. The sufficient statistics, suffStat, have been
    37  // observed with nSamples observations. The prior values of the distribution are those
    38  // currently in the distribution, and have been observed with priorStrength samples.
    39  //
    40  // For the normal distribution, the sufficient statistics are the mean and
    41  // uncorrected standard deviation of the samples.
    42  // The prior is having seen strength[0] samples with mean Normal.Mu
    43  // and strength[1] samples with standard deviation Normal.Sigma. As a result of
    44  // this function, Normal.Mu and Normal.Sigma are updated based on the weighted
    45  // samples, and strength is modified to include the new number of samples observed.
    46  //
    47  // This function panics if len(suffStat) != n.NumSuffStat() or
    48  // len(priorStrength) != n.NumSuffStat().
    49  func (n *Normal) ConjugateUpdate(suffStat []float64, nSamples float64, priorStrength []float64) {
    50  	// TODO: Support prior strength with math.Inf(1) to allow updating with
    51  	// a known mean/standard deviation
    52  	if len(suffStat) != n.NumSuffStat() {
    53  		panic("norm: incorrect suffStat length")
    54  	}
    55  	if len(priorStrength) != n.NumSuffStat() {
    56  		panic("norm: incorrect priorStrength length")
    57  	}
    58  
    59  	totalMeanSamples := nSamples + priorStrength[0]
    60  	totalSum := suffStat[0]*nSamples + n.Mu*priorStrength[0]
    61  
    62  	totalVarianceSamples := nSamples + priorStrength[1]
    63  	// sample variance
    64  	totalVariance := nSamples * suffStat[1] * suffStat[1]
    65  	// add prior variance
    66  	totalVariance += priorStrength[1] * n.Sigma * n.Sigma
    67  	// add cross variance from the difference of the means
    68  	meanDiff := (suffStat[0] - n.Mu)
    69  	totalVariance += priorStrength[0] * nSamples * meanDiff * meanDiff / totalMeanSamples
    70  
    71  	n.Mu = totalSum / totalMeanSamples
    72  	n.Sigma = math.Sqrt(totalVariance / totalVarianceSamples)
    73  	floats.AddConst(nSamples, priorStrength)
    74  }
    75  
    76  // Entropy returns the differential entropy of the distribution.
    77  func (n Normal) Entropy() float64 {
    78  	return 0.5 * (log2Pi + 1 + 2*math.Log(n.Sigma))
    79  }
    80  
    81  // ExKurtosis returns the excess kurtosis of the distribution.
    82  func (Normal) ExKurtosis() float64 {
    83  	return 0
    84  }
    85  
    86  // Fit sets the parameters of the probability distribution from the
    87  // data samples x with relative weights w. If weights is nil, then all the weights
    88  // are 1. If weights is not nil, then the len(weights) must equal len(samples).
    89  func (n *Normal) Fit(samples, weights []float64) {
    90  	suffStat := make([]float64, n.NumSuffStat())
    91  	nSamples := n.SuffStat(suffStat, samples, weights)
    92  	n.ConjugateUpdate(suffStat, nSamples, make([]float64, n.NumSuffStat()))
    93  }
    94  
    95  // LogProb computes the natural logarithm of the value of the probability density function at x.
    96  func (n Normal) LogProb(x float64) float64 {
    97  	return negLogRoot2Pi - math.Log(n.Sigma) - (x-n.Mu)*(x-n.Mu)/(2*n.Sigma*n.Sigma)
    98  }
    99  
   100  // Mean returns the mean of the probability distribution.
   101  func (n Normal) Mean() float64 {
   102  	return n.Mu
   103  }
   104  
   105  // Median returns the median of the normal distribution.
   106  func (n Normal) Median() float64 {
   107  	return n.Mu
   108  }
   109  
   110  // Mode returns the mode of the normal distribution.
   111  func (n Normal) Mode() float64 {
   112  	return n.Mu
   113  }
   114  
   115  // NumParameters returns the number of parameters in the distribution.
   116  func (Normal) NumParameters() int {
   117  	return 2
   118  }
   119  
   120  // NumSuffStat returns the number of sufficient statistics for the distribution.
   121  func (Normal) NumSuffStat() int {
   122  	return 2
   123  }
   124  
   125  // Prob computes the value of the probability density function at x.
   126  func (n Normal) Prob(x float64) float64 {
   127  	return math.Exp(n.LogProb(x))
   128  }
   129  
   130  // Quantile returns the inverse of the cumulative probability distribution.
   131  func (n Normal) Quantile(p float64) float64 {
   132  	if p < 0 || p > 1 {
   133  		panic(badPercentile)
   134  	}
   135  	return n.Mu + n.Sigma*mathext.NormalQuantile(p)
   136  }
   137  
   138  // Rand returns a random sample drawn from the distribution.
   139  func (n Normal) Rand() float64 {
   140  	var rnd float64
   141  	if n.Src == nil {
   142  		rnd = rand.NormFloat64()
   143  	} else {
   144  		rnd = rand.New(n.Src).NormFloat64()
   145  	}
   146  	return rnd*n.Sigma + n.Mu
   147  }
   148  
   149  // Score returns the score function with respect to the parameters of the
   150  // distribution at the input location x. The score function is the derivative
   151  // of the log-likelihood at x with respect to the parameters
   152  //  (∂/∂θ) log(p(x;θ))
   153  // If deriv is non-nil, len(deriv) must equal the number of parameters otherwise
   154  // Score will panic, and the derivative is stored in-place into deriv. If deriv
   155  // is nil a new slice will be allocated and returned.
   156  //
   157  // The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Sigma].
   158  //
   159  // For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29.
   160  func (n Normal) Score(deriv []float64, x float64) []float64 {
   161  	if deriv == nil {
   162  		deriv = make([]float64, n.NumParameters())
   163  	}
   164  	if len(deriv) != n.NumParameters() {
   165  		panic(badLength)
   166  	}
   167  	deriv[0] = (x - n.Mu) / (n.Sigma * n.Sigma)
   168  	deriv[1] = 1 / n.Sigma * (-1 + ((x-n.Mu)/n.Sigma)*((x-n.Mu)/n.Sigma))
   169  	return deriv
   170  }
   171  
   172  // ScoreInput returns the score function with respect to the input of the
   173  // distribution at the input location specified by x. The score function is the
   174  // derivative of the log-likelihood
   175  //  (d/dx) log(p(x)) .
   176  func (n Normal) ScoreInput(x float64) float64 {
   177  	return -(1 / (2 * n.Sigma * n.Sigma)) * 2 * (x - n.Mu)
   178  }
   179  
   180  // Skewness returns the skewness of the distribution.
   181  func (Normal) Skewness() float64 {
   182  	return 0
   183  }
   184  
   185  // StdDev returns the standard deviation of the probability distribution.
   186  func (n Normal) StdDev() float64 {
   187  	return n.Sigma
   188  }
   189  
   190  // SuffStat computes the sufficient statistics of a set of samples to update
   191  // the distribution. The sufficient statistics are stored in place, and the
   192  // effective number of samples are returned.
   193  //
   194  // The normal distribution has two sufficient statistics, the mean of the samples
   195  // and the standard deviation of the samples.
   196  //
   197  // If weights is nil, the weights are assumed to be 1, otherwise panics if
   198  // len(samples) != len(weights). Panics if len(suffStat) != NumSuffStat().
   199  func (Normal) SuffStat(suffStat, samples, weights []float64) (nSamples float64) {
   200  	lenSamp := len(samples)
   201  	if len(weights) != 0 && len(samples) != len(weights) {
   202  		panic(badLength)
   203  	}
   204  	if len(suffStat) != (Normal{}).NumSuffStat() {
   205  		panic(badSuffStat)
   206  	}
   207  
   208  	if len(weights) == 0 {
   209  		nSamples = float64(lenSamp)
   210  	} else {
   211  		nSamples = floats.Sum(weights)
   212  	}
   213  
   214  	mean := stat.Mean(samples, weights)
   215  	suffStat[0] = mean
   216  
   217  	// Use Moment and not StdDev because we want it to be uncorrected
   218  	variance := stat.MomentAbout(2, samples, mean, weights)
   219  	suffStat[1] = math.Sqrt(variance)
   220  	return nSamples
   221  }
   222  
   223  // Survival returns the survival function (complementary CDF) at x.
   224  func (n Normal) Survival(x float64) float64 {
   225  	return 0.5 * (1 - math.Erf((x-n.Mu)/(n.Sigma*math.Sqrt2)))
   226  }
   227  
   228  // setParameters modifies the parameters of the distribution.
   229  func (n *Normal) setParameters(p []Parameter) {
   230  	if len(p) != n.NumParameters() {
   231  		panic("normal: incorrect number of parameters to set")
   232  	}
   233  	if p[0].Name != "Mu" {
   234  		panic("normal: " + panicNameMismatch)
   235  	}
   236  	if p[1].Name != "Sigma" {
   237  		panic("normal: " + panicNameMismatch)
   238  	}
   239  	n.Mu = p[0].Value
   240  	n.Sigma = p[1].Value
   241  }
   242  
   243  // Variance returns the variance of the probability distribution.
   244  func (n Normal) Variance() float64 {
   245  	return n.Sigma * n.Sigma
   246  }
   247  
   248  // parameters returns the parameters of the distribution.
   249  func (n Normal) parameters(p []Parameter) []Parameter {
   250  	nParam := n.NumParameters()
   251  	if p == nil {
   252  		p = make([]Parameter, nParam)
   253  	} else if len(p) != nParam {
   254  		panic("normal: improper parameter length")
   255  	}
   256  	p[0].Name = "Mu"
   257  	p[0].Value = n.Mu
   258  	p[1].Name = "Sigma"
   259  	p[1].Value = n.Sigma
   260  	return p
   261  }