github.com/jgbaldwinbrown/perf@v0.1.1/pkg/stats/sample.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package stats 6 7 import ( 8 "math" 9 "sort" 10 ) 11 12 // Sample is a collection of possibly weighted data points. 13 type Sample struct { 14 // Xs is the slice of sample values. 15 Xs []float64 16 17 // Weights[i] is the weight of sample Xs[i]. If Weights is 18 // nil, all Xs have weight 1. Weights must have the same 19 // length of Xs and all values must be non-negative. 20 Weights []float64 21 22 // Sorted indicates that Xs is sorted in ascending order. 23 Sorted bool 24 } 25 26 // Bounds returns the minimum and maximum values of xs. 27 func Bounds(xs []float64) (min float64, max float64) { 28 if len(xs) == 0 { 29 return math.NaN(), math.NaN() 30 } 31 min, max = xs[0], xs[0] 32 for _, x := range xs { 33 if x < min { 34 min = x 35 } 36 if x > max { 37 max = x 38 } 39 } 40 return 41 } 42 43 // Bounds returns the minimum and maximum values of the Sample. 44 // 45 // If the Sample is weighted, this ignores samples with zero weight. 46 // 47 // This is constant time if s.Sorted and there are no zero-weighted 48 // values. 49 func (s Sample) Bounds() (min float64, max float64) { 50 if len(s.Xs) == 0 || (!s.Sorted && s.Weights == nil) { 51 return Bounds(s.Xs) 52 } 53 54 if s.Sorted { 55 if s.Weights == nil { 56 return s.Xs[0], s.Xs[len(s.Xs)-1] 57 } 58 min, max = math.NaN(), math.NaN() 59 for i, w := range s.Weights { 60 if w != 0 { 61 min = s.Xs[i] 62 break 63 } 64 } 65 if math.IsNaN(min) { 66 return 67 } 68 for i := range s.Weights { 69 if s.Weights[len(s.Weights)-i-1] != 0 { 70 max = s.Xs[len(s.Weights)-i-1] 71 break 72 } 73 } 74 } else { 75 min, max = math.Inf(1), math.Inf(-1) 76 for i, x := range s.Xs { 77 w := s.Weights[i] 78 if x < min && w != 0 { 79 min = x 80 } 81 if x > max && w != 0 { 82 max = x 83 } 84 } 85 if math.IsInf(min, 0) { 86 min, max = math.NaN(), math.NaN() 87 } 88 } 89 return 90 } 91 92 // vecSum returns the sum of xs. 93 func vecSum(xs []float64) float64 { 94 sum := 0.0 95 for _, x := range xs { 96 sum += x 97 } 98 return sum 99 } 100 101 // Sum returns the (possibly weighted) sum of the Sample. 102 func (s Sample) Sum() float64 { 103 if s.Weights == nil { 104 return vecSum(s.Xs) 105 } 106 sum := 0.0 107 for i, x := range s.Xs { 108 sum += x * s.Weights[i] 109 } 110 return sum 111 } 112 113 // Weight returns the total weight of the Sasmple. 114 func (s Sample) Weight() float64 { 115 if s.Weights == nil { 116 return float64(len(s.Xs)) 117 } 118 return vecSum(s.Weights) 119 } 120 121 // Mean returns the arithmetic mean of xs. 122 func Mean(xs []float64) float64 { 123 if len(xs) == 0 { 124 return math.NaN() 125 } 126 m := 0.0 127 for i, x := range xs { 128 m += (x - m) / float64(i+1) 129 } 130 return m 131 } 132 133 // Mean returns the arithmetic mean of the Sample. 134 func (s Sample) Mean() float64 { 135 if len(s.Xs) == 0 || s.Weights == nil { 136 return Mean(s.Xs) 137 } 138 139 m, wsum := 0.0, 0.0 140 for i, x := range s.Xs { 141 // Use weighted incremental mean: 142 // m_i = (1 - w_i/wsum_i) * m_(i-1) + (w_i/wsum_i) * x_i 143 // = m_(i-1) + (x_i - m_(i-1)) * (w_i/wsum_i) 144 w := s.Weights[i] 145 wsum += w 146 m += (x - m) * w / wsum 147 } 148 return m 149 } 150 151 // GeoMean returns the geometric mean of xs. xs must be positive. 152 func GeoMean(xs []float64) float64 { 153 if len(xs) == 0 { 154 return math.NaN() 155 } 156 m := 0.0 157 for i, x := range xs { 158 if x <= 0 { 159 return math.NaN() 160 } 161 lx := math.Log(x) 162 m += (lx - m) / float64(i+1) 163 } 164 return math.Exp(m) 165 } 166 167 // GeoMean returns the geometric mean of the Sample. All samples 168 // values must be positive. 169 func (s Sample) GeoMean() float64 { 170 if len(s.Xs) == 0 || s.Weights == nil { 171 return GeoMean(s.Xs) 172 } 173 174 m, wsum := 0.0, 0.0 175 for i, x := range s.Xs { 176 w := s.Weights[i] 177 wsum += w 178 lx := math.Log(x) 179 m += (lx - m) * w / wsum 180 } 181 return math.Exp(m) 182 } 183 184 // Variance returns the sample variance of xs. 185 func Variance(xs []float64) float64 { 186 if len(xs) == 0 { 187 return math.NaN() 188 } else if len(xs) <= 1 { 189 return 0 190 } 191 192 // Based on Wikipedia's presentation of Welford 1962 193 // (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm). 194 // This is more numerically stable than the standard two-pass 195 // formula and not prone to massive cancellation. 196 mean, M2 := 0.0, 0.0 197 for n, x := range xs { 198 delta := x - mean 199 mean += delta / float64(n+1) 200 M2 += delta * (x - mean) 201 } 202 return M2 / float64(len(xs)-1) 203 } 204 205 func (s Sample) Variance() float64 { 206 if len(s.Xs) == 0 || s.Weights == nil { 207 return Variance(s.Xs) 208 } 209 // TODO(austin) 210 panic("Weighted Variance not implemented") 211 } 212 213 // StdDev returns the sample standard deviation of xs. 214 func StdDev(xs []float64) float64 { 215 return math.Sqrt(Variance(xs)) 216 } 217 218 // StdDev returns the sample standard deviation of the Sample. 219 func (s Sample) StdDev() float64 { 220 if len(s.Xs) == 0 || s.Weights == nil { 221 return StdDev(s.Xs) 222 } 223 // TODO(austin) 224 panic("Weighted StdDev not implemented") 225 } 226 227 // Percentile returns the pctileth value from the Sample. This uses 228 // interpolation method R8 from Hyndman and Fan (1996). 229 // 230 // pctile will be capped to the range [0, 1]. If len(xs) == 0 or all 231 // weights are 0, returns NaN. 232 // 233 // Percentile(0.5) is the median. Percentile(0.25) and 234 // Percentile(0.75) are the first and third quartiles, respectively. 235 // 236 // This is constant time if s.Sorted and s.Weights == nil. 237 func (s Sample) Percentile(pctile float64) float64 { 238 if len(s.Xs) == 0 { 239 return math.NaN() 240 } else if pctile <= 0 { 241 min, _ := s.Bounds() 242 return min 243 } else if pctile >= 1 { 244 _, max := s.Bounds() 245 return max 246 } 247 248 if !s.Sorted { 249 // TODO(austin) Use select algorithm instead 250 s = *s.Copy().Sort() 251 } 252 253 if s.Weights == nil { 254 N := float64(len(s.Xs)) 255 //n := pctile * (N + 1) // R6 256 n := 1/3.0 + pctile*(N+1/3.0) // R8 257 kf, frac := math.Modf(n) 258 k := int(kf) 259 if k <= 0 { 260 return s.Xs[0] 261 } else if k >= len(s.Xs) { 262 return s.Xs[len(s.Xs)-1] 263 } 264 return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) 265 } else { 266 // TODO(austin): Implement interpolation 267 268 target := s.Weight() * pctile 269 270 // TODO(austin) If we had cumulative weights, we could 271 // do this in log time. 272 for i, weight := range s.Weights { 273 target -= weight 274 if target < 0 { 275 return s.Xs[i] 276 } 277 } 278 return s.Xs[len(s.Xs)-1] 279 } 280 } 281 282 // IQR returns the interquartile range of the Sample. 283 // 284 // This is constant time if s.Sorted and s.Weights == nil. 285 func (s Sample) IQR() float64 { 286 if !s.Sorted { 287 s = *s.Copy().Sort() 288 } 289 return s.Percentile(0.75) - s.Percentile(0.25) 290 } 291 292 type sampleSorter struct { 293 xs []float64 294 weights []float64 295 } 296 297 func (p *sampleSorter) Len() int { 298 return len(p.xs) 299 } 300 301 func (p *sampleSorter) Less(i, j int) bool { 302 return p.xs[i] < p.xs[j] 303 } 304 305 func (p *sampleSorter) Swap(i, j int) { 306 p.xs[i], p.xs[j] = p.xs[j], p.xs[i] 307 p.weights[i], p.weights[j] = p.weights[j], p.weights[i] 308 } 309 310 // Sort sorts the samples in place in s and returns s. 311 // 312 // A sorted sample improves the performance of some algorithms. 313 func (s *Sample) Sort() *Sample { 314 if s.Sorted || sort.Float64sAreSorted(s.Xs) { 315 // All set 316 } else if s.Weights == nil { 317 sort.Float64s(s.Xs) 318 } else { 319 sort.Sort(&sampleSorter{s.Xs, s.Weights}) 320 } 321 s.Sorted = true 322 return s 323 } 324 325 // Copy returns a copy of the Sample. 326 // 327 // The returned Sample shares no data with the original, so they can 328 // be modified (for example, sorted) independently. 329 func (s Sample) Copy() *Sample { 330 xs := make([]float64, len(s.Xs)) 331 copy(xs, s.Xs) 332 333 weights := []float64(nil) 334 if s.Weights != nil { 335 weights = make([]float64, len(s.Weights)) 336 copy(weights, s.Weights) 337 } 338 339 return &Sample{xs, weights, s.Sorted} 340 }