vitess.io/vitess@v0.16.2/go/mathstats/sample.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mathstats
     6  
     7  import (
     8  	"math"
     9  	"sort"
    10  )
    11  
    12  // Sample is a collection of possibly weighted data points.
    13  type Sample struct {
    14  	// Xs is the slice of sample values.
    15  	Xs []float64
    16  
    17  	// Sorted indicates that Xs is sorted in ascending order.
    18  	Sorted bool
    19  }
    20  
    21  // Bounds returns the minimum and maximum values of xs.
    22  func Bounds(xs []float64) (min float64, max float64) {
    23  	if len(xs) == 0 {
    24  		return math.NaN(), math.NaN()
    25  	}
    26  	min, max = xs[0], xs[0]
    27  	for _, x := range xs {
    28  		if x < min {
    29  			min = x
    30  		}
    31  		if x > max {
    32  			max = x
    33  		}
    34  	}
    35  	return
    36  }
    37  
    38  // Bounds returns the minimum and maximum values of the Sample.
    39  //
    40  // If the Sample is weighted, this ignores samples with zero weight.
    41  //
    42  // This is constant time if s.Sorted and there are no zero-weighted
    43  // values.
    44  func (s Sample) Bounds() (min float64, max float64) {
    45  	if len(s.Xs) == 0 || !s.Sorted {
    46  		return Bounds(s.Xs)
    47  	}
    48  	return s.Xs[0], s.Xs[len(s.Xs)-1]
    49  }
    50  
    51  // vecSum returns the sum of xs.
    52  func vecSum(xs []float64) float64 {
    53  	sum := 0.0
    54  	for _, x := range xs {
    55  		sum += x
    56  	}
    57  	return sum
    58  }
    59  
    60  // Sum returns the (possibly weighted) sum of the Sample.
    61  func (s Sample) Sum() float64 {
    62  	return vecSum(s.Xs)
    63  }
    64  
    65  // Weight returns the total weight of the Sasmple.
    66  func (s Sample) Weight() float64 {
    67  	return float64(len(s.Xs))
    68  }
    69  
    70  // Mean returns the arithmetic mean of xs.
    71  func Mean(xs []float64) float64 {
    72  	if len(xs) == 0 {
    73  		return math.NaN()
    74  	}
    75  	m := 0.0
    76  	for i, x := range xs {
    77  		m += (x - m) / float64(i+1)
    78  	}
    79  	return m
    80  }
    81  
    82  // Mean returns the arithmetic mean of the Sample.
    83  func (s Sample) Mean() float64 {
    84  	return Mean(s.Xs)
    85  }
    86  
    87  // GeoMean returns the geometric mean of xs. xs must be positive.
    88  func GeoMean(xs []float64) float64 {
    89  	if len(xs) == 0 {
    90  		return math.NaN()
    91  	}
    92  	m := 0.0
    93  	for i, x := range xs {
    94  		if x <= 0 {
    95  			return math.NaN()
    96  		}
    97  		lx := math.Log(x)
    98  		m += (lx - m) / float64(i+1)
    99  	}
   100  	return math.Exp(m)
   101  }
   102  
   103  // GeoMean returns the geometric mean of the Sample. All samples
   104  // values must be positive.
   105  func (s Sample) GeoMean() float64 {
   106  	return GeoMean(s.Xs)
   107  }
   108  
   109  // Variance returns the sample variance of xs.
   110  func Variance(xs []float64) float64 {
   111  	if len(xs) == 0 {
   112  		return math.NaN()
   113  	} else if len(xs) <= 1 {
   114  		return 0
   115  	}
   116  
   117  	// Based on Wikipedia's presentation of Welford 1962
   118  	// (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm).
   119  	// This is more numerically stable than the standard two-pass
   120  	// formula and not prone to massive cancellation.
   121  	mean, M2 := 0.0, 0.0
   122  	for n, x := range xs {
   123  		delta := x - mean
   124  		mean += delta / float64(n+1)
   125  		M2 += delta * (x - mean)
   126  	}
   127  	return M2 / float64(len(xs)-1)
   128  }
   129  
   130  // Variance returns the variance of xs
   131  func (s Sample) Variance() float64 {
   132  	return Variance(s.Xs)
   133  }
   134  
   135  // StdDev returns the sample standard deviation of xs.
   136  func StdDev(xs []float64) float64 {
   137  	return math.Sqrt(Variance(xs))
   138  }
   139  
   140  // StdDev returns the sample standard deviation of the Sample.
   141  func (s Sample) StdDev() float64 {
   142  	return StdDev(s.Xs)
   143  }
   144  
   145  // Percentile returns the pctileth value from the Sample. This uses
   146  // interpolation method R8 from Hyndman and Fan (1996).
   147  //
   148  // pctile will be capped to the range [0, 1]. If len(xs) == 0 or all
   149  // weights are 0, returns NaN.
   150  //
   151  // Percentile(0.5) is the median. Percentile(0.25) and
   152  // Percentile(0.75) are the first and third quartiles, respectively.
   153  //
   154  // This is constant time if s.Sorted and s.Weights == nil.
   155  func (s *Sample) Percentile(pctile float64) float64 {
   156  	if len(s.Xs) == 0 {
   157  		return math.NaN()
   158  	} else if pctile <= 0 {
   159  		min, _ := s.Bounds()
   160  		return min
   161  	} else if pctile >= 1 {
   162  		_, max := s.Bounds()
   163  		return max
   164  	}
   165  
   166  	if !s.Sorted {
   167  		s.Sort()
   168  	}
   169  
   170  	N := float64(len(s.Xs))
   171  	//n := pctile * (N + 1) // R6
   172  	n := 1/3.0 + pctile*(N+1/3.0) // R8
   173  	kf, frac := math.Modf(n)
   174  	k := int(kf)
   175  	if k <= 0 {
   176  		return s.Xs[0]
   177  	} else if k >= len(s.Xs) {
   178  		return s.Xs[len(s.Xs)-1]
   179  	}
   180  	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
   181  }
   182  
   183  // IQR returns the interquartile range of the Sample.
   184  //
   185  // This is constant time if s.Sorted and s.Weights == nil.
   186  func (s Sample) IQR() float64 {
   187  	if !s.Sorted {
   188  		s = *s.Copy().Sort()
   189  	}
   190  	return s.Percentile(0.75) - s.Percentile(0.25)
   191  }
   192  
   193  // Sort sorts the samples in place in s and returns s.
   194  //
   195  // A sorted sample improves the performance of some algorithms.
   196  func (s *Sample) Sort() *Sample {
   197  	if s.Sorted || sort.Float64sAreSorted(s.Xs) {
   198  		// All set
   199  	} else {
   200  		sort.Float64s(s.Xs)
   201  	}
   202  	s.Sorted = true
   203  	return s
   204  }
   205  
   206  // Copy returns a copy of the Sample.
   207  //
   208  // The returned Sample shares no data with the original, so they can
   209  // be modified (for example, sorted) independently.
   210  func (s Sample) Copy() *Sample {
   211  	xs := make([]float64, len(s.Xs))
   212  	copy(xs, s.Xs)
   213  	return &Sample{xs, s.Sorted}
   214  }
   215  
   216  // FilterOutliers updates this sample in-place by removing all the values that are outliers
   217  func (s *Sample) FilterOutliers() {
   218  	// Discard outliers.
   219  	q1, q3 := s.Percentile(0.25), s.Percentile(0.75)
   220  	lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1)
   221  	nn := 0
   222  	for _, value := range s.Xs {
   223  		if lo <= value && value <= hi {
   224  			s.Xs[nn] = value
   225  			nn++
   226  		}
   227  	}
   228  	s.Xs = s.Xs[:nn]
   229  }
   230  
   231  // Clear resets this sample so it contains 0 values
   232  func (s *Sample) Clear() {
   233  	s.Xs = s.Xs[:0]
   234  	s.Sorted = false
   235  }