github.com/outcaste-io/ristretto@v0.2.3/z/histogram.go (about) 1 /* 2 * Copyright 2020 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package z 18 19 import ( 20 "fmt" 21 "math" 22 "strings" 23 24 "github.com/dustin/go-humanize" 25 ) 26 27 // Creates bounds for an histogram. The bounds are powers of two of the form 28 // [2^min_exponent, ..., 2^max_exponent]. 29 func HistogramBounds(minExponent, maxExponent uint32) []float64 { 30 var bounds []float64 31 for i := minExponent; i <= maxExponent; i++ { 32 bounds = append(bounds, float64(int(1)<<i)) 33 } 34 return bounds 35 } 36 37 func Fibonacci(num int) []float64 { 38 assert(num > 4) 39 bounds := make([]float64, num) 40 bounds[0] = 1 41 bounds[1] = 2 42 for i := 2; i < num; i++ { 43 bounds[i] = bounds[i-1] + bounds[i-2] 44 } 45 return bounds 46 } 47 48 // HistogramData stores the information needed to represent the sizes of the keys and values 49 // as a histogram. 50 type HistogramData struct { 51 Bounds []float64 52 Count int64 53 CountPerBucket []int64 54 Min int64 55 Max int64 56 Sum int64 57 } 58 59 // NewHistogramData returns a new instance of HistogramData with properly initialized fields. 60 func NewHistogramData(bounds []float64) *HistogramData { 61 return &HistogramData{ 62 Bounds: bounds, 63 CountPerBucket: make([]int64, len(bounds)+1), 64 Max: 0, 65 Min: math.MaxInt64, 66 } 67 } 68 69 func (histogram *HistogramData) Copy() *HistogramData { 70 if histogram == nil { 71 return nil 72 } 73 return &HistogramData{ 74 Bounds: append([]float64{}, histogram.Bounds...), 75 CountPerBucket: append([]int64{}, histogram.CountPerBucket...), 76 Count: histogram.Count, 77 Min: histogram.Min, 78 Max: histogram.Max, 79 Sum: histogram.Sum, 80 } 81 } 82 83 // Update changes the Min and Max fields if value is less than or greater than the current values. 84 func (histogram *HistogramData) Update(value int64) { 85 if histogram == nil { 86 return 87 } 88 if value > histogram.Max { 89 histogram.Max = value 90 } 91 if value < histogram.Min { 92 histogram.Min = value 93 } 94 95 histogram.Sum += value 96 histogram.Count++ 97 98 for index := 0; index <= len(histogram.Bounds); index++ { 99 // Allocate value in the last buckets if we reached the end of the Bounds array. 100 if index == len(histogram.Bounds) { 101 histogram.CountPerBucket[index]++ 102 break 103 } 104 105 if value < int64(histogram.Bounds[index]) { 106 histogram.CountPerBucket[index]++ 107 break 108 } 109 } 110 } 111 112 // Mean returns the mean value for the histogram. 113 func (histogram *HistogramData) Mean() float64 { 114 if histogram.Count == 0 { 115 return 0 116 } 117 return float64(histogram.Sum) / float64(histogram.Count) 118 } 119 120 // String converts the histogram data into human-readable string. 121 func (histogram *HistogramData) String() string { 122 if histogram == nil { 123 return "" 124 } 125 var b strings.Builder 126 127 b.WriteString("\n -- Histogram: \n") 128 b.WriteString(fmt.Sprintf("Min value: %d \n", histogram.Min)) 129 b.WriteString(fmt.Sprintf("Max value: %d \n", histogram.Max)) 130 b.WriteString(fmt.Sprintf("Count: %d \n", histogram.Count)) 131 b.WriteString(fmt.Sprintf("50p: %.2f \n", histogram.Percentile(0.5))) 132 b.WriteString(fmt.Sprintf("75p: %.2f \n", histogram.Percentile(0.75))) 133 b.WriteString(fmt.Sprintf("90p: %.2f \n", histogram.Percentile(0.90))) 134 135 numBounds := len(histogram.Bounds) 136 var cum float64 137 for index, count := range histogram.CountPerBucket { 138 if count == 0 { 139 continue 140 } 141 142 // The last bucket represents the bucket that contains the range from 143 // the last bound up to infinity so it's processed differently than the 144 // other buckets. 145 if index == len(histogram.CountPerBucket)-1 { 146 lowerBound := uint64(histogram.Bounds[numBounds-1]) 147 page := float64(count*100) / float64(histogram.Count) 148 cum += page 149 b.WriteString(fmt.Sprintf("[%s, %s) %d %.2f%% %.2f%%\n", 150 humanize.IBytes(lowerBound), "infinity", count, page, cum)) 151 continue 152 } 153 154 upperBound := uint64(histogram.Bounds[index]) 155 lowerBound := uint64(0) 156 if index > 0 { 157 lowerBound = uint64(histogram.Bounds[index-1]) 158 } 159 160 page := float64(count*100) / float64(histogram.Count) 161 cum += page 162 b.WriteString(fmt.Sprintf("[%d, %d) %d %.2f%% %.2f%%\n", 163 lowerBound, upperBound, count, page, cum)) 164 } 165 b.WriteString(" --\n") 166 return b.String() 167 } 168 169 // Percentile returns the percentile value for the histogram. 170 // value of p should be between [0.0-1.0] 171 func (histogram *HistogramData) Percentile(p float64) float64 { 172 if histogram == nil { 173 return 0 174 } 175 176 if histogram.Count == 0 { 177 // if no data return the minimum range 178 return histogram.Bounds[0] 179 } 180 pval := int64(float64(histogram.Count) * p) 181 for i, v := range histogram.CountPerBucket { 182 pval = pval - v 183 if pval <= 0 { 184 if i == len(histogram.Bounds) { 185 break 186 } 187 return histogram.Bounds[i] 188 } 189 } 190 // default return should be the max range 191 return histogram.Bounds[len(histogram.Bounds)-1] 192 } 193 194 // Clear reset the histogram. Helpful in situations where we need to reset the metrics 195 func (histogram *HistogramData) Clear() { 196 if histogram == nil { 197 return 198 } 199 200 histogram.Count = 0 201 histogram.CountPerBucket = make([]int64, len(histogram.Bounds)+1) 202 histogram.Sum = 0 203 histogram.Max = 0 204 histogram.Min = math.MaxInt64 205 }