github.com/gopherd/gonum@v0.0.4/stat/stat.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package stat
     6  
     7  import (
     8  	"math"
     9  	"sort"
    10  
    11  	"github.com/gopherd/gonum/floats"
    12  )
    13  
    14  // CumulantKind specifies the behavior for calculating the empirical CDF or Quantile
    15  type CumulantKind int
    16  
    17  // List of supported CumulantKind values for the Quantile function.
    18  // Constant values should match the R nomenclature. See
    19  // https://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population
    20  const (
    21  	// Empirical treats the distribution as the actual empirical distribution.
    22  	Empirical CumulantKind = 1
    23  	// LinInterp linearly interpolates the empirical distribution between sample values, with a flat extrapolation.
    24  	LinInterp CumulantKind = 4
    25  )
    26  
    27  // bhattacharyyaCoeff computes the Bhattacharyya Coefficient for probability distributions given by:
    28  //  \sum_i \sqrt{p_i q_i}
    29  //
    30  // It is assumed that p and q have equal length.
    31  func bhattacharyyaCoeff(p, q []float64) float64 {
    32  	var bc float64
    33  	for i, a := range p {
    34  		bc += math.Sqrt(a * q[i])
    35  	}
    36  	return bc
    37  }
    38  
    39  // Bhattacharyya computes the distance between the probability distributions p and q given by:
    40  //  -\ln ( \sum_i \sqrt{p_i q_i} )
    41  //
    42  // The lengths of p and q must be equal. It is assumed that p and q sum to 1.
    43  func Bhattacharyya(p, q []float64) float64 {
    44  	if len(p) != len(q) {
    45  		panic("stat: slice length mismatch")
    46  	}
    47  	bc := bhattacharyyaCoeff(p, q)
    48  	return -math.Log(bc)
    49  }
    50  
    51  // CDF returns the empirical cumulative distribution function value of x, that is
    52  // the fraction of the samples less than or equal to q. The
    53  // exact behavior is determined by the CumulantKind. CDF is theoretically
    54  // the inverse of the Quantile function, though it may not be the actual inverse
    55  // for all values q and CumulantKinds.
    56  //
    57  // The x data must be sorted in increasing order. If weights is nil then all
    58  // of the weights are 1. If weights is not nil, then len(x) must equal len(weights).
    59  // CDF will panic if the length of x is zero.
    60  //
    61  // CumulantKind behaviors:
    62  //  - Empirical: Returns the lowest fraction for which q is greater than or equal
    63  //  to that fraction of samples
    64  func CDF(q float64, c CumulantKind, x, weights []float64) float64 {
    65  	if weights != nil && len(x) != len(weights) {
    66  		panic("stat: slice length mismatch")
    67  	}
    68  	if floats.HasNaN(x) {
    69  		return math.NaN()
    70  	}
    71  	if len(x) == 0 {
    72  		panic("stat: zero length slice")
    73  	}
    74  	if !sort.Float64sAreSorted(x) {
    75  		panic("x data are not sorted")
    76  	}
    77  
    78  	if q < x[0] {
    79  		return 0
    80  	}
    81  	if q >= x[len(x)-1] {
    82  		return 1
    83  	}
    84  
    85  	var sumWeights float64
    86  	if weights == nil {
    87  		sumWeights = float64(len(x))
    88  	} else {
    89  		sumWeights = floats.Sum(weights)
    90  	}
    91  
    92  	// Calculate the index
    93  	switch c {
    94  	case Empirical:
    95  		// Find the smallest value that is greater than that percent of the samples
    96  		var w float64
    97  		for i, v := range x {
    98  			if v > q {
    99  				return w / sumWeights
   100  			}
   101  			if weights == nil {
   102  				w++
   103  			} else {
   104  				w += weights[i]
   105  			}
   106  		}
   107  		panic("impossible")
   108  	default:
   109  		panic("stat: bad cumulant kind")
   110  	}
   111  }
   112  
   113  // ChiSquare computes the chi-square distance between the observed frequencies 'obs' and
   114  // expected frequencies 'exp' given by:
   115  //  \sum_i (obs_i-exp_i)^2 / exp_i
   116  //
   117  // The lengths of obs and exp must be equal.
   118  func ChiSquare(obs, exp []float64) float64 {
   119  	if len(obs) != len(exp) {
   120  		panic("stat: slice length mismatch")
   121  	}
   122  	var result float64
   123  	for i, a := range obs {
   124  		b := exp[i]
   125  		if a == 0 && b == 0 {
   126  			continue
   127  		}
   128  		result += (a - b) * (a - b) / b
   129  	}
   130  	return result
   131  }
   132  
   133  // CircularMean returns the circular mean of the dataset.
   134  //	atan2(\sum_i w_i * sin(alpha_i), \sum_i w_i * cos(alpha_i))
   135  // If weights is nil then all of the weights are 1. If weights is not nil, then
   136  // len(x) must equal len(weights).
   137  func CircularMean(x, weights []float64) float64 {
   138  	if weights != nil && len(x) != len(weights) {
   139  		panic("stat: slice length mismatch")
   140  	}
   141  
   142  	var aX, aY float64
   143  	if weights != nil {
   144  		for i, v := range x {
   145  			aX += weights[i] * math.Cos(v)
   146  			aY += weights[i] * math.Sin(v)
   147  		}
   148  	} else {
   149  		for _, v := range x {
   150  			aX += math.Cos(v)
   151  			aY += math.Sin(v)
   152  		}
   153  	}
   154  
   155  	return math.Atan2(aY, aX)
   156  }
   157  
   158  // Correlation returns the weighted correlation between the samples of x and y
   159  // with the given means.
   160  //  sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (stdX * stdY)
   161  // The lengths of x and y must be equal. If weights is nil then all of the
   162  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   163  func Correlation(x, y, weights []float64) float64 {
   164  	// This is a two-pass corrected implementation. It is an adaptation of the
   165  	// algorithm used in the MeanVariance function, which applies a correction
   166  	// to the typical two pass approach.
   167  
   168  	if len(x) != len(y) {
   169  		panic("stat: slice length mismatch")
   170  	}
   171  	xu := Mean(x, weights)
   172  	yu := Mean(y, weights)
   173  	var (
   174  		sxx           float64
   175  		syy           float64
   176  		sxy           float64
   177  		xcompensation float64
   178  		ycompensation float64
   179  	)
   180  	if weights == nil {
   181  		for i, xv := range x {
   182  			yv := y[i]
   183  			xd := xv - xu
   184  			yd := yv - yu
   185  			sxx += xd * xd
   186  			syy += yd * yd
   187  			sxy += xd * yd
   188  			xcompensation += xd
   189  			ycompensation += yd
   190  		}
   191  		// xcompensation and ycompensation are from Chan, et. al.
   192  		// referenced in the MeanVariance function. They are analogous
   193  		// to the second term in (1.7) in that paper.
   194  		sxx -= xcompensation * xcompensation / float64(len(x))
   195  		syy -= ycompensation * ycompensation / float64(len(x))
   196  
   197  		return (sxy - xcompensation*ycompensation/float64(len(x))) / math.Sqrt(sxx*syy)
   198  
   199  	}
   200  
   201  	var sumWeights float64
   202  	for i, xv := range x {
   203  		w := weights[i]
   204  		yv := y[i]
   205  		xd := xv - xu
   206  		wxd := w * xd
   207  		yd := yv - yu
   208  		wyd := w * yd
   209  		sxx += wxd * xd
   210  		syy += wyd * yd
   211  		sxy += wxd * yd
   212  		xcompensation += wxd
   213  		ycompensation += wyd
   214  		sumWeights += w
   215  	}
   216  	// xcompensation and ycompensation are from Chan, et. al.
   217  	// referenced in the MeanVariance function. They are analogous
   218  	// to the second term in (1.7) in that paper, except they use
   219  	// the sumWeights instead of the sample count.
   220  	sxx -= xcompensation * xcompensation / sumWeights
   221  	syy -= ycompensation * ycompensation / sumWeights
   222  
   223  	return (sxy - xcompensation*ycompensation/sumWeights) / math.Sqrt(sxx*syy)
   224  }
   225  
   226  // Kendall returns the weighted Tau-a Kendall correlation between the
   227  // samples of x and y. The Kendall correlation measures the quantity of
   228  // concordant and discordant pairs of numbers. If weights are specified then
   229  // each pair is weighted by weights[i] * weights[j] and the final sum is
   230  // normalized to stay between -1 and 1.
   231  // The lengths of x and y must be equal. If weights is nil then all of the
   232  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   233  func Kendall(x, y, weights []float64) float64 {
   234  	if len(x) != len(y) {
   235  		panic("stat: slice length mismatch")
   236  	}
   237  
   238  	var (
   239  		cc float64 // number of concordant pairs
   240  		dc float64 // number of discordant pairs
   241  		n  = len(x)
   242  	)
   243  
   244  	if weights == nil {
   245  		for i := 0; i < n; i++ {
   246  			for j := i; j < n; j++ {
   247  				if i == j {
   248  					continue
   249  				}
   250  				if math.Signbit(x[j]-x[i]) == math.Signbit(y[j]-y[i]) {
   251  					cc++
   252  				} else {
   253  					dc++
   254  				}
   255  			}
   256  		}
   257  		return (cc - dc) / float64(n*(n-1)/2)
   258  	}
   259  
   260  	var sumWeights float64
   261  
   262  	for i := 0; i < n; i++ {
   263  		for j := i; j < n; j++ {
   264  			if i == j {
   265  				continue
   266  			}
   267  			weight := weights[i] * weights[j]
   268  			if math.Signbit(x[j]-x[i]) == math.Signbit(y[j]-y[i]) {
   269  				cc += weight
   270  			} else {
   271  				dc += weight
   272  			}
   273  			sumWeights += weight
   274  		}
   275  	}
   276  	return float64(cc-dc) / sumWeights
   277  }
   278  
   279  // Covariance returns the weighted covariance between the samples of x and y.
   280  //  sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1)
   281  // The lengths of x and y must be equal. If weights is nil then all of the
   282  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   283  func Covariance(x, y, weights []float64) float64 {
   284  	// This is a two-pass corrected implementation. It is an adaptation of the
   285  	// algorithm used in the MeanVariance function, which applies a correction
   286  	// to the typical two pass approach.
   287  
   288  	if len(x) != len(y) {
   289  		panic("stat: slice length mismatch")
   290  	}
   291  	xu := Mean(x, weights)
   292  	yu := Mean(y, weights)
   293  	return covarianceMeans(x, y, weights, xu, yu)
   294  }
   295  
   296  // covarianceMeans returns the weighted covariance between x and y with the mean
   297  // of x and y already specified. See the documentation of Covariance for more
   298  // information.
   299  func covarianceMeans(x, y, weights []float64, xu, yu float64) float64 {
   300  	var (
   301  		ss            float64
   302  		xcompensation float64
   303  		ycompensation float64
   304  	)
   305  	if weights == nil {
   306  		for i, xv := range x {
   307  			yv := y[i]
   308  			xd := xv - xu
   309  			yd := yv - yu
   310  			ss += xd * yd
   311  			xcompensation += xd
   312  			ycompensation += yd
   313  		}
   314  		// xcompensation and ycompensation are from Chan, et. al.
   315  		// referenced in the MeanVariance function. They are analogous
   316  		// to the second term in (1.7) in that paper.
   317  		return (ss - xcompensation*ycompensation/float64(len(x))) / float64(len(x)-1)
   318  	}
   319  
   320  	var sumWeights float64
   321  
   322  	for i, xv := range x {
   323  		w := weights[i]
   324  		yv := y[i]
   325  		wxd := w * (xv - xu)
   326  		yd := (yv - yu)
   327  		ss += wxd * yd
   328  		xcompensation += wxd
   329  		ycompensation += w * yd
   330  		sumWeights += w
   331  	}
   332  	// xcompensation and ycompensation are from Chan, et. al.
   333  	// referenced in the MeanVariance function. They are analogous
   334  	// to the second term in (1.7) in that paper, except they use
   335  	// the sumWeights instead of the sample count.
   336  	return (ss - xcompensation*ycompensation/sumWeights) / (sumWeights - 1)
   337  }
   338  
   339  // CrossEntropy computes the cross-entropy between the two distributions specified
   340  // in p and q.
   341  func CrossEntropy(p, q []float64) float64 {
   342  	if len(p) != len(q) {
   343  		panic("stat: slice length mismatch")
   344  	}
   345  	var ce float64
   346  	for i, v := range p {
   347  		if v != 0 {
   348  			ce -= v * math.Log(q[i])
   349  		}
   350  	}
   351  	return ce
   352  }
   353  
   354  // Entropy computes the Shannon entropy of a distribution or the distance between
   355  // two distributions. The natural logarithm is used.
   356  //  - sum_i (p_i * log_e(p_i))
   357  func Entropy(p []float64) float64 {
   358  	var e float64
   359  	for _, v := range p {
   360  		if v != 0 { // Entropy needs 0 * log(0) == 0.
   361  			e -= v * math.Log(v)
   362  		}
   363  	}
   364  	return e
   365  }
   366  
   367  // ExKurtosis returns the population excess kurtosis of the sample.
   368  // The kurtosis is defined by the 4th moment of the mean divided by the squared
   369  // variance. The excess kurtosis subtracts 3.0 so that the excess kurtosis of
   370  // the normal distribution is zero.
   371  // If weights is nil then all of the weights are 1. If weights is not nil, then
   372  // len(x) must equal len(weights).
   373  func ExKurtosis(x, weights []float64) float64 {
   374  	mean, std := MeanStdDev(x, weights)
   375  	if weights == nil {
   376  		var e float64
   377  		for _, v := range x {
   378  			z := (v - mean) / std
   379  			e += z * z * z * z
   380  		}
   381  		mul, offset := kurtosisCorrection(float64(len(x)))
   382  		return e*mul - offset
   383  	}
   384  
   385  	var (
   386  		e          float64
   387  		sumWeights float64
   388  	)
   389  	for i, v := range x {
   390  		z := (v - mean) / std
   391  		e += weights[i] * z * z * z * z
   392  		sumWeights += weights[i]
   393  	}
   394  	mul, offset := kurtosisCorrection(sumWeights)
   395  	return e*mul - offset
   396  }
   397  
   398  // n is the number of samples
   399  // see https://en.wikipedia.org/wiki/Kurtosis
   400  func kurtosisCorrection(n float64) (mul, offset float64) {
   401  	return ((n + 1) / (n - 1)) * (n / (n - 2)) * (1 / (n - 3)), 3 * ((n - 1) / (n - 2)) * ((n - 1) / (n - 3))
   402  }
   403  
   404  // GeometricMean returns the weighted geometric mean of the dataset
   405  //  \prod_i {x_i ^ w_i}
   406  // This only applies with positive x and positive weights. If weights is nil
   407  // then all of the weights are 1. If weights is not nil, then len(x) must equal
   408  // len(weights).
   409  func GeometricMean(x, weights []float64) float64 {
   410  	if weights == nil {
   411  		var s float64
   412  		for _, v := range x {
   413  			s += math.Log(v)
   414  		}
   415  		s /= float64(len(x))
   416  		return math.Exp(s)
   417  	}
   418  	if len(x) != len(weights) {
   419  		panic("stat: slice length mismatch")
   420  	}
   421  	var (
   422  		s          float64
   423  		sumWeights float64
   424  	)
   425  	for i, v := range x {
   426  		s += weights[i] * math.Log(v)
   427  		sumWeights += weights[i]
   428  	}
   429  	s /= sumWeights
   430  	return math.Exp(s)
   431  }
   432  
   433  // HarmonicMean returns the weighted harmonic mean of the dataset
   434  //  \sum_i {w_i} / ( sum_i {w_i / x_i} )
   435  // This only applies with positive x and positive weights.
   436  // If weights is nil then all of the weights are 1. If weights is not nil, then
   437  // len(x) must equal len(weights).
   438  func HarmonicMean(x, weights []float64) float64 {
   439  	if weights != nil && len(x) != len(weights) {
   440  		panic("stat: slice length mismatch")
   441  	}
   442  	// TODO(btracey): Fix this to make it more efficient and avoid allocation.
   443  
   444  	// This can be numerically unstable (for example if x is very small).
   445  	// W = \sum_i {w_i}
   446  	// hm = exp(log(W) - log(\sum_i w_i / x_i))
   447  
   448  	logs := make([]float64, len(x))
   449  	var W float64
   450  	for i := range x {
   451  		if weights == nil {
   452  			logs[i] = -math.Log(x[i])
   453  			W++
   454  			continue
   455  		}
   456  		logs[i] = math.Log(weights[i]) - math.Log(x[i])
   457  		W += weights[i]
   458  	}
   459  
   460  	// Sum all of the logs
   461  	v := floats.LogSumExp(logs) // This computes log(\sum_i { w_i / x_i}).
   462  	return math.Exp(math.Log(W) - v)
   463  }
   464  
   465  // Hellinger computes the distance between the probability distributions p and q given by:
   466  //  \sqrt{ 1 - \sum_i \sqrt{p_i q_i} }
   467  //
   468  // The lengths of p and q must be equal. It is assumed that p and q sum to 1.
   469  func Hellinger(p, q []float64) float64 {
   470  	if len(p) != len(q) {
   471  		panic("stat: slice length mismatch")
   472  	}
   473  	bc := bhattacharyyaCoeff(p, q)
   474  	return math.Sqrt(1 - bc)
   475  }
   476  
   477  // Histogram sums up the weighted number of data points in each bin.
   478  // The weight of data point x[i] will be placed into count[j] if
   479  // dividers[j] <= x < dividers[j+1]. The "span" function in the floats package can assist
   480  // with bin creation.
   481  //
   482  // The following conditions on the inputs apply:
   483  //  - The count variable must either be nil or have length of one less than dividers.
   484  //  - The values in dividers must be sorted (use the sort package).
   485  //  - The x values must be sorted.
   486  //  - If weights is nil then all of the weights are 1.
   487  //  - If weights is not nil, then len(x) must equal len(weights).
   488  func Histogram(count, dividers, x, weights []float64) []float64 {
   489  	if weights != nil && len(x) != len(weights) {
   490  		panic("stat: slice length mismatch")
   491  	}
   492  	if count == nil {
   493  		count = make([]float64, len(dividers)-1)
   494  	}
   495  	if len(dividers) < 2 {
   496  		panic("histogram: fewer than two dividers")
   497  	}
   498  	if len(count) != len(dividers)-1 {
   499  		panic("histogram: bin count mismatch")
   500  	}
   501  	if !sort.Float64sAreSorted(dividers) {
   502  		panic("histogram: dividers are not sorted")
   503  	}
   504  	if !sort.Float64sAreSorted(x) {
   505  		panic("histogram: x data are not sorted")
   506  	}
   507  	for i := range count {
   508  		count[i] = 0
   509  	}
   510  	if len(x) == 0 {
   511  		return count
   512  	}
   513  	if x[0] < dividers[0] {
   514  		panic("histogram: minimum x value is less than lowest divider")
   515  	}
   516  	if dividers[len(dividers)-1] <= x[len(x)-1] {
   517  		panic("histogram: maximum x value is greater than or equal to highest divider")
   518  	}
   519  
   520  	idx := 0
   521  	comp := dividers[idx+1]
   522  	if weights == nil {
   523  		for _, v := range x {
   524  			if v < comp {
   525  				// Still in the current bucket.
   526  				count[idx]++
   527  				continue
   528  			}
   529  			// Find the next divider where v is less than the divider.
   530  			for j := idx + 1; j < len(dividers); j++ {
   531  				if v < dividers[j+1] {
   532  					idx = j
   533  					comp = dividers[j+1]
   534  					break
   535  				}
   536  			}
   537  			count[idx]++
   538  		}
   539  		return count
   540  	}
   541  
   542  	for i, v := range x {
   543  		if v < comp {
   544  			// Still in the current bucket.
   545  			count[idx] += weights[i]
   546  			continue
   547  		}
   548  		// Need to find the next divider where v is less than the divider.
   549  		for j := idx + 1; j < len(count); j++ {
   550  			if v < dividers[j+1] {
   551  				idx = j
   552  				comp = dividers[j+1]
   553  				break
   554  			}
   555  		}
   556  		count[idx] += weights[i]
   557  	}
   558  	return count
   559  }
   560  
   561  // JensenShannon computes the JensenShannon divergence between the distributions
   562  // p and q. The Jensen-Shannon divergence is defined as
   563  //  m = 0.5 * (p + q)
   564  //  JS(p, q) = 0.5 ( KL(p, m) + KL(q, m) )
   565  // Unlike Kullback-Liebler, the Jensen-Shannon distance is symmetric. The value
   566  // is between 0 and ln(2).
   567  func JensenShannon(p, q []float64) float64 {
   568  	if len(p) != len(q) {
   569  		panic("stat: slice length mismatch")
   570  	}
   571  	var js float64
   572  	for i, v := range p {
   573  		qi := q[i]
   574  		m := 0.5 * (v + qi)
   575  		if v != 0 {
   576  			// add kl from p to m
   577  			js += 0.5 * v * (math.Log(v) - math.Log(m))
   578  		}
   579  		if qi != 0 {
   580  			// add kl from q to m
   581  			js += 0.5 * qi * (math.Log(qi) - math.Log(m))
   582  		}
   583  	}
   584  	return js
   585  }
   586  
   587  // KolmogorovSmirnov computes the largest distance between two empirical CDFs.
   588  // Each dataset x and y consists of sample locations and counts, xWeights and
   589  // yWeights, respectively.
   590  //
   591  // x and y may have different lengths, though len(x) must equal len(xWeights), and
   592  // len(y) must equal len(yWeights). Both x and y must be sorted.
   593  //
   594  // Special cases are:
   595  //  = 0 if len(x) == len(y) == 0
   596  //  = 1 if len(x) == 0, len(y) != 0 or len(x) != 0 and len(y) == 0
   597  func KolmogorovSmirnov(x, xWeights, y, yWeights []float64) float64 {
   598  	if xWeights != nil && len(x) != len(xWeights) {
   599  		panic("stat: slice length mismatch")
   600  	}
   601  	if yWeights != nil && len(y) != len(yWeights) {
   602  		panic("stat: slice length mismatch")
   603  	}
   604  	if len(x) == 0 || len(y) == 0 {
   605  		if len(x) == 0 && len(y) == 0 {
   606  			return 0
   607  		}
   608  		return 1
   609  	}
   610  
   611  	if floats.HasNaN(x) {
   612  		return math.NaN()
   613  	}
   614  	if floats.HasNaN(y) {
   615  		return math.NaN()
   616  	}
   617  
   618  	if !sort.Float64sAreSorted(x) {
   619  		panic("x data are not sorted")
   620  	}
   621  	if !sort.Float64sAreSorted(y) {
   622  		panic("y data are not sorted")
   623  	}
   624  
   625  	xWeightsNil := xWeights == nil
   626  	yWeightsNil := yWeights == nil
   627  
   628  	var (
   629  		maxDist    float64
   630  		xSum, ySum float64
   631  		xCdf, yCdf float64
   632  		xIdx, yIdx int
   633  	)
   634  
   635  	if xWeightsNil {
   636  		xSum = float64(len(x))
   637  	} else {
   638  		xSum = floats.Sum(xWeights)
   639  	}
   640  
   641  	if yWeightsNil {
   642  		ySum = float64(len(y))
   643  	} else {
   644  		ySum = floats.Sum(yWeights)
   645  	}
   646  
   647  	xVal := x[0]
   648  	yVal := y[0]
   649  
   650  	// Algorithm description:
   651  	// The goal is to find the maximum difference in the empirical CDFs for the
   652  	// two datasets. The CDFs are piecewise-constant, and thus the distance
   653  	// between the CDFs will only change at the values themselves.
   654  	//
   655  	// To find the maximum distance, step through the data in ascending order
   656  	// of value between the two datasets. At each step, compute the empirical CDF
   657  	// and compare the local distance with the maximum distance.
   658  	// Due to some corner cases, equal data entries must be tallied simultaneously.
   659  	for {
   660  		switch {
   661  		case xVal < yVal:
   662  			xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
   663  		case yVal < xVal:
   664  			yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
   665  		case xVal == yVal:
   666  			newX := x[xIdx]
   667  			newY := y[yIdx]
   668  			if newX < newY {
   669  				xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
   670  			} else if newY < newX {
   671  				yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
   672  			} else {
   673  				// Update them both, they'll be equal next time and the right
   674  				// thing will happen.
   675  				xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
   676  				yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
   677  			}
   678  		default:
   679  			panic("unreachable")
   680  		}
   681  
   682  		dist := math.Abs(xCdf - yCdf)
   683  		if dist > maxDist {
   684  			maxDist = dist
   685  		}
   686  
   687  		// Both xCdf and yCdf will equal 1 at the end, so if we have reached the
   688  		// end of either sample list, the distance is as large as it can be.
   689  		if xIdx == len(x) || yIdx == len(y) {
   690  			return maxDist
   691  		}
   692  	}
   693  }
   694  
   695  // updateKS gets the next data point from one of the set. In doing so, it combines
   696  // the weight of all the data points of equal value. Upon return, val is the new
   697  // value of the data set, newCdf is the total combined CDF up until this point,
   698  // and newIdx is the index of the next location in that sample to examine.
   699  func updateKS(idx int, cdf, sum float64, values, weights []float64, isNil bool) (val, newCdf float64, newIdx int) {
   700  	// Sum up all the weights of consecutive values that are equal.
   701  	if isNil {
   702  		newCdf = cdf + 1/sum
   703  	} else {
   704  		newCdf = cdf + weights[idx]/sum
   705  	}
   706  	newIdx = idx + 1
   707  	for {
   708  		if newIdx == len(values) {
   709  			return values[newIdx-1], newCdf, newIdx
   710  		}
   711  		if values[newIdx-1] != values[newIdx] {
   712  			return values[newIdx], newCdf, newIdx
   713  		}
   714  		if isNil {
   715  			newCdf += 1 / sum
   716  		} else {
   717  			newCdf += weights[newIdx] / sum
   718  		}
   719  		newIdx++
   720  	}
   721  }
   722  
   723  // KullbackLeibler computes the Kullback-Leibler distance between the
   724  // distributions p and q. The natural logarithm is used.
   725  //  sum_i(p_i * log(p_i / q_i))
   726  // Note that the Kullback-Leibler distance is not symmetric;
   727  // KullbackLeibler(p,q) != KullbackLeibler(q,p)
   728  func KullbackLeibler(p, q []float64) float64 {
   729  	if len(p) != len(q) {
   730  		panic("stat: slice length mismatch")
   731  	}
   732  	var kl float64
   733  	for i, v := range p {
   734  		if v != 0 { // Entropy needs 0 * log(0) == 0.
   735  			kl += v * (math.Log(v) - math.Log(q[i]))
   736  		}
   737  	}
   738  	return kl
   739  }
   740  
   741  // LinearRegression computes the best-fit line
   742  //  y = alpha + beta*x
   743  // to the data in x and y with the given weights. If origin is true, the
   744  // regression is forced to pass through the origin.
   745  //
   746  // Specifically, LinearRegression computes the values of alpha and
   747  // beta such that the total residual
   748  //  \sum_i w[i]*(y[i] - alpha - beta*x[i])^2
   749  // is minimized. If origin is true, then alpha is forced to be zero.
   750  //
   751  // The lengths of x and y must be equal. If weights is nil then all of the
   752  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   753  func LinearRegression(x, y, weights []float64, origin bool) (alpha, beta float64) {
   754  	if len(x) != len(y) {
   755  		panic("stat: slice length mismatch")
   756  	}
   757  	if weights != nil && len(weights) != len(x) {
   758  		panic("stat: slice length mismatch")
   759  	}
   760  
   761  	w := 1.0
   762  	if origin {
   763  		var x2Sum, xySum float64
   764  		for i, xi := range x {
   765  			if weights != nil {
   766  				w = weights[i]
   767  			}
   768  			yi := y[i]
   769  			xySum += w * xi * yi
   770  			x2Sum += w * xi * xi
   771  		}
   772  		beta = xySum / x2Sum
   773  
   774  		return 0, beta
   775  	}
   776  
   777  	xu, xv := MeanVariance(x, weights)
   778  	yu := Mean(y, weights)
   779  	cov := covarianceMeans(x, y, weights, xu, yu)
   780  	beta = cov / xv
   781  	alpha = yu - beta*xu
   782  	return alpha, beta
   783  }
   784  
   785  // RSquared returns the coefficient of determination defined as
   786  //  R^2 = 1 - \sum_i w[i]*(y[i] - alpha - beta*x[i])^2 / \sum_i w[i]*(y[i] - mean(y))^2
   787  // for the line
   788  //  y = alpha + beta*x
   789  // and the data in x and y with the given weights.
   790  //
   791  // The lengths of x and y must be equal. If weights is nil then all of the
   792  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   793  func RSquared(x, y, weights []float64, alpha, beta float64) float64 {
   794  	if len(x) != len(y) {
   795  		panic("stat: slice length mismatch")
   796  	}
   797  	if weights != nil && len(weights) != len(x) {
   798  		panic("stat: slice length mismatch")
   799  	}
   800  
   801  	w := 1.0
   802  	yMean := Mean(y, weights)
   803  	var res, tot, d float64
   804  	for i, xi := range x {
   805  		if weights != nil {
   806  			w = weights[i]
   807  		}
   808  		yi := y[i]
   809  		fi := alpha + beta*xi
   810  		d = yi - fi
   811  		res += w * d * d
   812  		d = yi - yMean
   813  		tot += w * d * d
   814  	}
   815  	return 1 - res/tot
   816  }
   817  
   818  // RSquaredFrom returns the coefficient of determination defined as
   819  //  R^2 = 1 - \sum_i w[i]*(estimate[i] - value[i])^2 / \sum_i w[i]*(value[i] - mean(values))^2
   820  // and the data in estimates and values with the given weights.
   821  //
   822  // The lengths of estimates and values must be equal. If weights is nil then
   823  // all of the weights are 1. If weights is not nil, then len(values) must
   824  // equal len(weights).
   825  func RSquaredFrom(estimates, values, weights []float64) float64 {
   826  	if len(estimates) != len(values) {
   827  		panic("stat: slice length mismatch")
   828  	}
   829  	if weights != nil && len(weights) != len(values) {
   830  		panic("stat: slice length mismatch")
   831  	}
   832  
   833  	w := 1.0
   834  	mean := Mean(values, weights)
   835  	var res, tot, d float64
   836  	for i, val := range values {
   837  		if weights != nil {
   838  			w = weights[i]
   839  		}
   840  		d = val - estimates[i]
   841  		res += w * d * d
   842  		d = val - mean
   843  		tot += w * d * d
   844  	}
   845  	return 1 - res/tot
   846  }
   847  
   848  // RNoughtSquared returns the coefficient of determination defined as
   849  //  R₀^2 = \sum_i w[i]*(beta*x[i])^2 / \sum_i w[i]*y[i]^2
   850  // for the line
   851  //  y = beta*x
   852  // and the data in x and y with the given weights. RNoughtSquared should
   853  // only be used for best-fit lines regressed through the origin.
   854  //
   855  // The lengths of x and y must be equal. If weights is nil then all of the
   856  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   857  func RNoughtSquared(x, y, weights []float64, beta float64) float64 {
   858  	if len(x) != len(y) {
   859  		panic("stat: slice length mismatch")
   860  	}
   861  	if weights != nil && len(weights) != len(x) {
   862  		panic("stat: slice length mismatch")
   863  	}
   864  
   865  	w := 1.0
   866  	var ssr, tot float64
   867  	for i, xi := range x {
   868  		if weights != nil {
   869  			w = weights[i]
   870  		}
   871  		fi := beta * xi
   872  		ssr += w * fi * fi
   873  		yi := y[i]
   874  		tot += w * yi * yi
   875  	}
   876  	return ssr / tot
   877  }
   878  
   879  // Mean computes the weighted mean of the data set.
   880  //  sum_i {w_i * x_i} / sum_i {w_i}
   881  // If weights is nil then all of the weights are 1. If weights is not nil, then
   882  // len(x) must equal len(weights).
   883  func Mean(x, weights []float64) float64 {
   884  	if weights == nil {
   885  		return floats.Sum(x) / float64(len(x))
   886  	}
   887  	if len(x) != len(weights) {
   888  		panic("stat: slice length mismatch")
   889  	}
   890  	var (
   891  		sumValues  float64
   892  		sumWeights float64
   893  	)
   894  	for i, w := range weights {
   895  		sumValues += w * x[i]
   896  		sumWeights += w
   897  	}
   898  	return sumValues / sumWeights
   899  }
   900  
   901  // Mode returns the most common value in the dataset specified by x and the
   902  // given weights. Strict float64 equality is used when comparing values, so users
   903  // should take caution. If several values are the mode, any of them may be returned.
   904  func Mode(x, weights []float64) (val float64, count float64) {
   905  	if weights != nil && len(x) != len(weights) {
   906  		panic("stat: slice length mismatch")
   907  	}
   908  	if len(x) == 0 {
   909  		return 0, 0
   910  	}
   911  	m := make(map[float64]float64)
   912  	if weights == nil {
   913  		for _, v := range x {
   914  			m[v]++
   915  		}
   916  	} else {
   917  		for i, v := range x {
   918  			m[v] += weights[i]
   919  		}
   920  	}
   921  	var (
   922  		maxCount float64
   923  		max      float64
   924  	)
   925  	for val, count := range m {
   926  		if count > maxCount {
   927  			maxCount = count
   928  			max = val
   929  		}
   930  	}
   931  	return max, maxCount
   932  }
   933  
   934  // BivariateMoment computes the weighted mixed moment between the samples x and y.
   935  //  E[(x - μ_x)^r*(y - μ_y)^s]
   936  // No degrees of freedom correction is done.
   937  // The lengths of x and y must be equal. If weights is nil then all of the
   938  // weights are 1. If weights is not nil, then len(x) must equal len(weights).
   939  func BivariateMoment(r, s float64, x, y, weights []float64) float64 {
   940  	meanX := Mean(x, weights)
   941  	meanY := Mean(y, weights)
   942  	if len(x) != len(y) {
   943  		panic("stat: slice length mismatch")
   944  	}
   945  	if weights == nil {
   946  		var m float64
   947  		for i, vx := range x {
   948  			vy := y[i]
   949  			m += math.Pow(vx-meanX, r) * math.Pow(vy-meanY, s)
   950  		}
   951  		return m / float64(len(x))
   952  	}
   953  	if len(weights) != len(x) {
   954  		panic("stat: slice length mismatch")
   955  	}
   956  	var (
   957  		m          float64
   958  		sumWeights float64
   959  	)
   960  	for i, vx := range x {
   961  		vy := y[i]
   962  		w := weights[i]
   963  		m += w * math.Pow(vx-meanX, r) * math.Pow(vy-meanY, s)
   964  		sumWeights += w
   965  	}
   966  	return m / sumWeights
   967  }
   968  
   969  // Moment computes the weighted n^th moment of the samples,
   970  //  E[(x - μ)^N]
   971  // No degrees of freedom correction is done.
   972  // If weights is nil then all of the weights are 1. If weights is not nil, then
   973  // len(x) must equal len(weights).
   974  func Moment(moment float64, x, weights []float64) float64 {
   975  	// This also checks that x and weights have the same length.
   976  	mean := Mean(x, weights)
   977  	if weights == nil {
   978  		var m float64
   979  		for _, v := range x {
   980  			m += math.Pow(v-mean, moment)
   981  		}
   982  		return m / float64(len(x))
   983  	}
   984  	var (
   985  		m          float64
   986  		sumWeights float64
   987  	)
   988  	for i, v := range x {
   989  		w := weights[i]
   990  		m += w * math.Pow(v-mean, moment)
   991  		sumWeights += w
   992  	}
   993  	return m / sumWeights
   994  }
   995  
   996  // MomentAbout computes the weighted n^th weighted moment of the samples about
   997  // the given mean \mu,
   998  //  E[(x - μ)^N]
   999  // No degrees of freedom correction is done.
  1000  // If weights is nil then all of the weights are 1. If weights is not nil, then
  1001  // len(x) must equal len(weights).
  1002  func MomentAbout(moment float64, x []float64, mean float64, weights []float64) float64 {
  1003  	if weights == nil {
  1004  		var m float64
  1005  		for _, v := range x {
  1006  			m += math.Pow(v-mean, moment)
  1007  		}
  1008  		m /= float64(len(x))
  1009  		return m
  1010  	}
  1011  	if len(weights) != len(x) {
  1012  		panic("stat: slice length mismatch")
  1013  	}
  1014  	var (
  1015  		m          float64
  1016  		sumWeights float64
  1017  	)
  1018  	for i, v := range x {
  1019  		m += weights[i] * math.Pow(v-mean, moment)
  1020  		sumWeights += weights[i]
  1021  	}
  1022  	return m / sumWeights
  1023  }
  1024  
  1025  // Quantile returns the sample of x such that x is greater than or
  1026  // equal to the fraction p of samples. The exact behavior is determined by the
  1027  // CumulantKind, and p should be a number between 0 and 1. Quantile is theoretically
  1028  // the inverse of the CDF function, though it may not be the actual inverse
  1029  // for all values p and CumulantKinds.
  1030  //
  1031  // The x data must be sorted in increasing order. If weights is nil then all
  1032  // of the weights are 1. If weights is not nil, then len(x) must equal len(weights).
  1033  // Quantile will panic if the length of x is zero.
  1034  //
  1035  // CumulantKind behaviors:
  1036  //  - Empirical: Returns the lowest value q for which q is greater than or equal
  1037  //  to the fraction p of samples
  1038  //  - LinInterp: Returns the linearly interpolated value
  1039  func Quantile(p float64, c CumulantKind, x, weights []float64) float64 {
  1040  	if !(p >= 0 && p <= 1) {
  1041  		panic("stat: percentile out of bounds")
  1042  	}
  1043  
  1044  	if weights != nil && len(x) != len(weights) {
  1045  		panic("stat: slice length mismatch")
  1046  	}
  1047  	if len(x) == 0 {
  1048  		panic("stat: zero length slice")
  1049  	}
  1050  	if floats.HasNaN(x) {
  1051  		return math.NaN() // This is needed because the algorithm breaks otherwise.
  1052  	}
  1053  	if !sort.Float64sAreSorted(x) {
  1054  		panic("x data are not sorted")
  1055  	}
  1056  
  1057  	var sumWeights float64
  1058  	if weights == nil {
  1059  		sumWeights = float64(len(x))
  1060  	} else {
  1061  		sumWeights = floats.Sum(weights)
  1062  	}
  1063  	switch c {
  1064  	case Empirical:
  1065  		return empiricalQuantile(p, x, weights, sumWeights)
  1066  	case LinInterp:
  1067  		return linInterpQuantile(p, x, weights, sumWeights)
  1068  	default:
  1069  		panic("stat: bad cumulant kind")
  1070  	}
  1071  }
  1072  
  1073  func empiricalQuantile(p float64, x, weights []float64, sumWeights float64) float64 {
  1074  	var cumsum float64
  1075  	fidx := p * sumWeights
  1076  	for i := range x {
  1077  		if weights == nil {
  1078  			cumsum++
  1079  		} else {
  1080  			cumsum += weights[i]
  1081  		}
  1082  		if cumsum >= fidx {
  1083  			return x[i]
  1084  		}
  1085  	}
  1086  	panic("impossible")
  1087  }
  1088  
  1089  func linInterpQuantile(p float64, x, weights []float64, sumWeights float64) float64 {
  1090  	var cumsum float64
  1091  	fidx := p * sumWeights
  1092  	for i := range x {
  1093  		if weights == nil {
  1094  			cumsum++
  1095  		} else {
  1096  			cumsum += weights[i]
  1097  		}
  1098  		if cumsum >= fidx {
  1099  			if i == 0 {
  1100  				return x[0]
  1101  			}
  1102  			t := cumsum - fidx
  1103  			if weights != nil {
  1104  				t /= weights[i]
  1105  			}
  1106  			return t*x[i-1] + (1-t)*x[i]
  1107  		}
  1108  	}
  1109  	panic("impossible")
  1110  }
  1111  
  1112  // Skew computes the skewness of the sample data.
  1113  // If weights is nil then all of the weights are 1. If weights is not nil, then
  1114  // len(x) must equal len(weights).
  1115  // When weights sum to 1 or less, a biased variance estimator should be used.
  1116  func Skew(x, weights []float64) float64 {
  1117  
  1118  	mean, std := MeanStdDev(x, weights)
  1119  	if weights == nil {
  1120  		var s float64
  1121  		for _, v := range x {
  1122  			z := (v - mean) / std
  1123  			s += z * z * z
  1124  		}
  1125  		return s * skewCorrection(float64(len(x)))
  1126  	}
  1127  	var (
  1128  		s          float64
  1129  		sumWeights float64
  1130  	)
  1131  	for i, v := range x {
  1132  		z := (v - mean) / std
  1133  		s += weights[i] * z * z * z
  1134  		sumWeights += weights[i]
  1135  	}
  1136  	return s * skewCorrection(sumWeights)
  1137  }
  1138  
  1139  // From: http://www.amstat.org/publications/jse/v19n2/doane.pdf page 7
  1140  func skewCorrection(n float64) float64 {
  1141  	return (n / (n - 1)) * (1 / (n - 2))
  1142  }
  1143  
  1144  // SortWeighted rearranges the data in x along with their corresponding
  1145  // weights so that the x data are sorted. The data is sorted in place.
  1146  // Weights may be nil, but if weights is non-nil then it must have the same
  1147  // length as x.
  1148  func SortWeighted(x, weights []float64) {
  1149  	if weights == nil {
  1150  		sort.Float64s(x)
  1151  		return
  1152  	}
  1153  	if len(x) != len(weights) {
  1154  		panic("stat: slice length mismatch")
  1155  	}
  1156  	sort.Sort(weightSorter{
  1157  		x: x,
  1158  		w: weights,
  1159  	})
  1160  }
  1161  
  1162  type weightSorter struct {
  1163  	x []float64
  1164  	w []float64
  1165  }
  1166  
  1167  func (w weightSorter) Len() int           { return len(w.x) }
  1168  func (w weightSorter) Less(i, j int) bool { return w.x[i] < w.x[j] }
  1169  func (w weightSorter) Swap(i, j int) {
  1170  	w.x[i], w.x[j] = w.x[j], w.x[i]
  1171  	w.w[i], w.w[j] = w.w[j], w.w[i]
  1172  }
  1173  
  1174  // SortWeightedLabeled rearranges the data in x along with their
  1175  // corresponding weights and boolean labels so that the x data are sorted.
  1176  // The data is sorted in place. Weights and labels may be nil, if either
  1177  // is non-nil it must have the same length as x.
  1178  func SortWeightedLabeled(x []float64, labels []bool, weights []float64) {
  1179  	if labels == nil {
  1180  		SortWeighted(x, weights)
  1181  		return
  1182  	}
  1183  	if weights == nil {
  1184  		if len(x) != len(labels) {
  1185  			panic("stat: slice length mismatch")
  1186  		}
  1187  		sort.Sort(labelSorter{
  1188  			x: x,
  1189  			l: labels,
  1190  		})
  1191  		return
  1192  	}
  1193  	if len(x) != len(labels) || len(x) != len(weights) {
  1194  		panic("stat: slice length mismatch")
  1195  	}
  1196  	sort.Sort(weightLabelSorter{
  1197  		x: x,
  1198  		l: labels,
  1199  		w: weights,
  1200  	})
  1201  }
  1202  
  1203  type labelSorter struct {
  1204  	x []float64
  1205  	l []bool
  1206  }
  1207  
  1208  func (a labelSorter) Len() int           { return len(a.x) }
  1209  func (a labelSorter) Less(i, j int) bool { return a.x[i] < a.x[j] }
  1210  func (a labelSorter) Swap(i, j int) {
  1211  	a.x[i], a.x[j] = a.x[j], a.x[i]
  1212  	a.l[i], a.l[j] = a.l[j], a.l[i]
  1213  }
  1214  
  1215  type weightLabelSorter struct {
  1216  	x []float64
  1217  	l []bool
  1218  	w []float64
  1219  }
  1220  
  1221  func (a weightLabelSorter) Len() int           { return len(a.x) }
  1222  func (a weightLabelSorter) Less(i, j int) bool { return a.x[i] < a.x[j] }
  1223  func (a weightLabelSorter) Swap(i, j int) {
  1224  	a.x[i], a.x[j] = a.x[j], a.x[i]
  1225  	a.l[i], a.l[j] = a.l[j], a.l[i]
  1226  	a.w[i], a.w[j] = a.w[j], a.w[i]
  1227  }
  1228  
  1229  // StdDev returns the sample standard deviation.
  1230  func StdDev(x, weights []float64) float64 {
  1231  	_, std := MeanStdDev(x, weights)
  1232  	return std
  1233  }
  1234  
  1235  // MeanStdDev returns the sample mean and unbiased standard deviation
  1236  // When weights sum to 1 or less, a biased variance estimator should be used.
  1237  func MeanStdDev(x, weights []float64) (mean, std float64) {
  1238  	mean, variance := MeanVariance(x, weights)
  1239  	return mean, math.Sqrt(variance)
  1240  }
  1241  
  1242  // StdErr returns the standard error in the mean with the given values.
  1243  func StdErr(std, sampleSize float64) float64 {
  1244  	return std / math.Sqrt(sampleSize)
  1245  }
  1246  
  1247  // StdScore returns the standard score (a.k.a. z-score, z-value) for the value x
  1248  // with the given mean and standard deviation, i.e.
  1249  //  (x - mean) / std
  1250  func StdScore(x, mean, std float64) float64 {
  1251  	return (x - mean) / std
  1252  }
  1253  
  1254  // Variance computes the unbiased weighted sample variance:
  1255  //  \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
  1256  // If weights is nil then all of the weights are 1. If weights is not nil, then
  1257  // len(x) must equal len(weights).
  1258  // When weights sum to 1 or less, a biased variance estimator should be used.
  1259  func Variance(x, weights []float64) float64 {
  1260  	_, variance := MeanVariance(x, weights)
  1261  	return variance
  1262  }
  1263  
  1264  // MeanVariance computes the sample mean and unbiased variance, where the mean and variance are
  1265  //  \sum_i w_i * x_i / (sum_i w_i)
  1266  //  \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
  1267  // respectively.
  1268  // If weights is nil then all of the weights are 1. If weights is not nil, then
  1269  // len(x) must equal len(weights).
  1270  // When weights sum to 1 or less, a biased variance estimator should be used.
  1271  func MeanVariance(x, weights []float64) (mean, variance float64) {
  1272  	var (
  1273  		unnormalisedVariance float64
  1274  		sumWeights           float64
  1275  	)
  1276  	mean, unnormalisedVariance, sumWeights = meanUnnormalisedVarianceSumWeights(x, weights)
  1277  	return mean, unnormalisedVariance / (sumWeights - 1)
  1278  }
  1279  
  1280  // PopMeanVariance computes the sample mean and biased variance (also known as
  1281  // "population variance"), where the mean and variance are
  1282  //  \sum_i w_i * x_i / (sum_i w_i)
  1283  //  \sum_i w_i (x_i - mean)^2 / (sum_i w_i)
  1284  // respectively.
  1285  // If weights is nil then all of the weights are 1. If weights is not nil, then
  1286  // len(x) must equal len(weights).
  1287  func PopMeanVariance(x, weights []float64) (mean, variance float64) {
  1288  	var (
  1289  		unnormalisedVariance float64
  1290  		sumWeights           float64
  1291  	)
  1292  	mean, unnormalisedVariance, sumWeights = meanUnnormalisedVarianceSumWeights(x, weights)
  1293  	return mean, unnormalisedVariance / sumWeights
  1294  }
  1295  
  1296  // PopMeanStdDev returns the sample mean and biased standard deviation
  1297  // (also known as "population standard deviation").
  1298  func PopMeanStdDev(x, weights []float64) (mean, std float64) {
  1299  	mean, variance := PopMeanVariance(x, weights)
  1300  	return mean, math.Sqrt(variance)
  1301  }
  1302  
  1303  // PopStdDev returns the population standard deviation, i.e., a square root
  1304  // of the biased variance estimate.
  1305  func PopStdDev(x, weights []float64) float64 {
  1306  	_, stDev := PopMeanStdDev(x, weights)
  1307  	return stDev
  1308  }
  1309  
  1310  // PopVariance computes the unbiased weighted sample variance:
  1311  //  \sum_i w_i (x_i - mean)^2 / (sum_i w_i)
  1312  // If weights is nil then all of the weights are 1. If weights is not nil, then
  1313  // len(x) must equal len(weights).
  1314  func PopVariance(x, weights []float64) float64 {
  1315  	_, variance := PopMeanVariance(x, weights)
  1316  	return variance
  1317  }
  1318  
  1319  func meanUnnormalisedVarianceSumWeights(x, weights []float64) (mean, unnormalisedVariance, sumWeights float64) {
  1320  	// This uses the corrected two-pass algorithm (1.7), from "Algorithms for computing
  1321  	// the sample variance: Analysis and recommendations" by Chan, Tony F., Gene H. Golub,
  1322  	// and Randall J. LeVeque.
  1323  
  1324  	// Note that this will panic if the slice lengths do not match.
  1325  	mean = Mean(x, weights)
  1326  	var (
  1327  		ss           float64
  1328  		compensation float64
  1329  	)
  1330  	if weights == nil {
  1331  		for _, v := range x {
  1332  			d := v - mean
  1333  			ss += d * d
  1334  			compensation += d
  1335  		}
  1336  		unnormalisedVariance = (ss - compensation*compensation/float64(len(x)))
  1337  		return mean, unnormalisedVariance, float64(len(x))
  1338  	}
  1339  
  1340  	for i, v := range x {
  1341  		w := weights[i]
  1342  		d := v - mean
  1343  		wd := w * d
  1344  		ss += wd * d
  1345  		compensation += wd
  1346  		sumWeights += w
  1347  	}
  1348  	unnormalisedVariance = (ss - compensation*compensation/sumWeights)
  1349  	return mean, unnormalisedVariance, sumWeights
  1350  }