github.com/jgbaldwinbrown/perf@v0.1.1/benchmath/sample.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package benchmath provides tools for computing statistics over 6 // distributions of benchmark measurements. 7 // 8 // This package is opinionated. For example, it doesn't provide 9 // specific statistical tests. Instead, callers state distributional 10 // assumptions and this package chooses appropriate tests. 11 // 12 // All analysis results contain a list of warnings, captured as an 13 // []error value. These aren't errors that prevent analysis, but 14 // should be presented to the user along with analysis results. 15 package benchmath 16 17 import ( 18 "fmt" 19 "math" 20 "sort" 21 22 "github.com/aclements/go-moremath/mathx" 23 "github.com/aclements/go-moremath/stats" 24 ) 25 26 // A Sample is a set of repeated measurements of a given benchmark. 27 type Sample struct { 28 // Values are the measured values, in ascending order. 29 Values []float64 30 31 // Thresholds stores the statistical thresholds used by tests 32 // on this sample. 33 Thresholds *Thresholds 34 35 // Warnings is a list of warnings about this sample that 36 // should be reported to the user. 37 Warnings []error 38 } 39 40 // NewSample constructs a Sample from a set of measurements. 41 func NewSample(values []float64, t *Thresholds) *Sample { 42 // TODO: Analyze stationarity and put results in Warnings. 43 // Consider Augmented Dickey–Fuller (based on Maricq et al.) 44 45 // Sort values for fast order statistics. 46 sort.Float64s(values) 47 return &Sample{values, t, nil} 48 } 49 50 func (s *Sample) sample() stats.Sample { 51 return stats.Sample{Xs: s.Values, Sorted: true} 52 } 53 54 // A Thresholds configures various thresholds used by statistical tests. 55 // 56 // This should be initialized to DefaultThresholds because it may be 57 // extended with other fields in the future. 58 type Thresholds struct { 59 // CompareAlpha is the alpha level below which 60 // Assumption.Compare rejects the null hypothesis that two 61 // samples come from the same distribution. 62 // 63 // This is typically 0.05. 64 CompareAlpha float64 65 } 66 67 // Note: Thresholds exists so we can extend it in the future with 68 // things like the stationarity and normality test thresholds without 69 // having to add function arguments in the future. 70 71 // DefaultThresholds contains a reasonable set of defaults for Thresholds. 72 var DefaultThresholds = Thresholds{ 73 CompareAlpha: 0.05, 74 } 75 76 // An Assumption indicates a distributional assumption about a sample. 77 type Assumption interface { 78 // SummaryLabel returns the string name for the summary 79 // statistic under this assumption. For example, "median" or 80 // "mean". 81 SummaryLabel() string 82 83 // Summary returns a summary statistic and its confidence 84 // interval at the given confidence level for Sample s. 85 // 86 // Confidence is given in the range [0,1], e.g., 0.95 for 95% 87 // confidence. 88 Summary(s *Sample, confidence float64) Summary 89 90 // Compare tests whether s1 and s2 come from the same 91 // distribution. 92 Compare(s1, s2 *Sample) Comparison 93 } 94 95 // A Summary summarizes a Sample. 96 type Summary struct { 97 // Center is some measure of the central tendency of a sample. 98 Center float64 99 100 // Lo and Hi give the bounds of the confidence interval around 101 // Center. 102 Lo, Hi float64 103 104 // Confidence is the actual confidence level of the confidence 105 // interval given by Lo, Hi. It will be >= the requested 106 // confidence level. 107 Confidence float64 108 109 // Warnings is a list of warnings about this summary or its 110 // confidence interval. 111 Warnings []error 112 } 113 114 // PctRangeString returns a string representation of the range of this 115 // Summary's confidence interval as a percentage. 116 func (s Summary) PctRangeString() string { 117 if math.IsInf(s.Lo, 0) || math.IsInf(s.Hi, 0) { 118 return "∞" 119 } 120 121 // If the signs of the bounds differ from the center, we can't 122 // render it as a percent. 123 var csign = mathx.Sign(s.Center) 124 if csign != mathx.Sign(s.Lo) || csign != mathx.Sign(s.Hi) { 125 return "?" 126 } 127 128 // If center is 0, avoid dividing by zero. But we can only get 129 // here if lo and hi are also 0, in which case is seems 130 // reasonable to call this 0%. 131 if s.Center == 0 { 132 return "0%" 133 } 134 135 // Phew. Compute the range percent. 136 v := math.Max(s.Hi/s.Center-1, 1-s.Lo/s.Center) 137 return fmt.Sprintf("%.0f%%", 100*v) 138 } 139 140 // A Comparison is the result of comparing two samples to test if they 141 // come from the same distribution. 142 type Comparison struct { 143 // P is the p-value of the null hypothesis that two samples 144 // come from the same distribution. If P is less than a 145 // threshold alpha (typically 0.05), then we reject the null 146 // hypothesis. 147 // 148 // P can be 0, which indicates this is an exact result. 149 P float64 150 151 // N1 and N2 are the sizes of the two samples. 152 N1, N2 int 153 154 // Alpha is the alpha threshold for this test. If P < Alpha, 155 // we reject the null hypothesis that the two samples come 156 // from the same distribution. 157 Alpha float64 158 159 // Warnings is a list of warnings about this comparison 160 // result. 161 Warnings []error 162 } 163 164 // String summarizes the comparison. The general form of this string 165 // is "p=0.PPP n=N1+N2" but can be shortened. 166 func (c Comparison) String() string { 167 var s string 168 if c.P != 0 { 169 s = fmt.Sprintf("p=%0.3f ", c.P) 170 } 171 if c.N1 == c.N2 { 172 // Slightly shorter form for a common case. 173 return s + fmt.Sprintf("n=%d", c.N1) 174 } 175 return s + fmt.Sprintf("n=%d+%d", c.N1, c.N2) 176 } 177 178 // FormatDelta formats the difference in the centers of two distributions. 179 // The old and new values must be the center summaries of the two 180 // compared samples. If the Comparison accepts the null hypothesis 181 // that the samples come from the same distribution, FormatDelta 182 // returns "~" to indicate there's no meaningful difference. 183 // Otherwise, it returns the percent difference between the centers. 184 func (c Comparison) FormatDelta(old, new float64) string { 185 if c.P > c.Alpha { 186 return "~" 187 } 188 if old == new { 189 return "0.00%" 190 } 191 if old == 0 { 192 return "?" 193 } 194 pct := ((new / old) - 1.0) * 100.0 195 return fmt.Sprintf("%+.2f%%", pct) 196 }