github.com/fiatjaf/generic-ristretto@v0.0.1/z/histogram.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package z
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"strings"
    23  
    24  	"github.com/dustin/go-humanize"
    25  )
    26  
    27  // Creates bounds for an histogram. The bounds are powers of two of the form
    28  // [2^min_exponent, ..., 2^max_exponent].
    29  func HistogramBounds(minExponent, maxExponent uint32) []float64 {
    30  	var bounds []float64
    31  	for i := minExponent; i <= maxExponent; i++ {
    32  		bounds = append(bounds, float64(int(1)<<i))
    33  	}
    34  	return bounds
    35  }
    36  
    37  func Fibonacci(num int) []float64 {
    38  	assert(num > 4)
    39  	bounds := make([]float64, num)
    40  	bounds[0] = 1
    41  	bounds[1] = 2
    42  	for i := 2; i < num; i++ {
    43  		bounds[i] = bounds[i-1] + bounds[i-2]
    44  	}
    45  	return bounds
    46  }
    47  
    48  // HistogramData stores the information needed to represent the sizes of the keys and values
    49  // as a histogram.
    50  type HistogramData struct {
    51  	Bounds         []float64
    52  	Count          int64
    53  	CountPerBucket []int64
    54  	Min            int64
    55  	Max            int64
    56  	Sum            int64
    57  }
    58  
    59  // NewHistogramData returns a new instance of HistogramData with properly initialized fields.
    60  func NewHistogramData(bounds []float64) *HistogramData {
    61  	return &HistogramData{
    62  		Bounds:         bounds,
    63  		CountPerBucket: make([]int64, len(bounds)+1),
    64  		Max:            0,
    65  		Min:            math.MaxInt64,
    66  	}
    67  }
    68  
    69  func (histogram *HistogramData) Copy() *HistogramData {
    70  	if histogram == nil {
    71  		return nil
    72  	}
    73  	return &HistogramData{
    74  		Bounds:         append([]float64{}, histogram.Bounds...),
    75  		CountPerBucket: append([]int64{}, histogram.CountPerBucket...),
    76  		Count:          histogram.Count,
    77  		Min:            histogram.Min,
    78  		Max:            histogram.Max,
    79  		Sum:            histogram.Sum,
    80  	}
    81  }
    82  
    83  // Update changes the Min and Max fields if value is less than or greater than the current values.
    84  func (histogram *HistogramData) Update(value int64) {
    85  	if histogram == nil {
    86  		return
    87  	}
    88  	if value > histogram.Max {
    89  		histogram.Max = value
    90  	}
    91  	if value < histogram.Min {
    92  		histogram.Min = value
    93  	}
    94  
    95  	histogram.Sum += value
    96  	histogram.Count++
    97  
    98  	for index := 0; index <= len(histogram.Bounds); index++ {
    99  		// Allocate value in the last buckets if we reached the end of the Bounds array.
   100  		if index == len(histogram.Bounds) {
   101  			histogram.CountPerBucket[index]++
   102  			break
   103  		}
   104  
   105  		if value < int64(histogram.Bounds[index]) {
   106  			histogram.CountPerBucket[index]++
   107  			break
   108  		}
   109  	}
   110  }
   111  
   112  // Mean returns the mean value for the histogram.
   113  func (histogram *HistogramData) Mean() float64 {
   114  	if histogram.Count == 0 {
   115  		return 0
   116  	}
   117  	return float64(histogram.Sum) / float64(histogram.Count)
   118  }
   119  
   120  // String converts the histogram data into human-readable string.
   121  func (histogram *HistogramData) String() string {
   122  	if histogram == nil {
   123  		return ""
   124  	}
   125  	var b strings.Builder
   126  
   127  	b.WriteString("\n -- Histogram: \n")
   128  	b.WriteString(fmt.Sprintf("Min value: %d \n", histogram.Min))
   129  	b.WriteString(fmt.Sprintf("Max value: %d \n", histogram.Max))
   130  	b.WriteString(fmt.Sprintf("Count: %d \n", histogram.Count))
   131  	b.WriteString(fmt.Sprintf("50p: %.2f \n", histogram.Percentile(0.5)))
   132  	b.WriteString(fmt.Sprintf("75p: %.2f \n", histogram.Percentile(0.75)))
   133  	b.WriteString(fmt.Sprintf("90p: %.2f \n", histogram.Percentile(0.90)))
   134  
   135  	numBounds := len(histogram.Bounds)
   136  	var cum float64
   137  	for index, count := range histogram.CountPerBucket {
   138  		if count == 0 {
   139  			continue
   140  		}
   141  
   142  		// The last bucket represents the bucket that contains the range from
   143  		// the last bound up to infinity so it's processed differently than the
   144  		// other buckets.
   145  		if index == len(histogram.CountPerBucket)-1 {
   146  			lowerBound := uint64(histogram.Bounds[numBounds-1])
   147  			page := float64(count*100) / float64(histogram.Count)
   148  			cum += page
   149  			b.WriteString(fmt.Sprintf("[%s, %s) %d %.2f%% %.2f%%\n",
   150  				humanize.IBytes(lowerBound), "infinity", count, page, cum))
   151  			continue
   152  		}
   153  
   154  		upperBound := uint64(histogram.Bounds[index])
   155  		lowerBound := uint64(0)
   156  		if index > 0 {
   157  			lowerBound = uint64(histogram.Bounds[index-1])
   158  		}
   159  
   160  		page := float64(count*100) / float64(histogram.Count)
   161  		cum += page
   162  		b.WriteString(fmt.Sprintf("[%d, %d) %d %.2f%% %.2f%%\n",
   163  			lowerBound, upperBound, count, page, cum))
   164  	}
   165  	b.WriteString(" --\n")
   166  	return b.String()
   167  }
   168  
   169  // Percentile returns the percentile value for the histogram.
   170  // value of p should be between [0.0-1.0]
   171  func (histogram *HistogramData) Percentile(p float64) float64 {
   172  	if histogram == nil {
   173  		return 0
   174  	}
   175  
   176  	if histogram.Count == 0 {
   177  		// if no data return the minimum range
   178  		return histogram.Bounds[0]
   179  	}
   180  	pval := int64(float64(histogram.Count) * p)
   181  	for i, v := range histogram.CountPerBucket {
   182  		pval = pval - v
   183  		if pval <= 0 {
   184  			if i == len(histogram.Bounds) {
   185  				break
   186  			}
   187  			return histogram.Bounds[i]
   188  		}
   189  	}
   190  	// default return should be the max range
   191  	return histogram.Bounds[len(histogram.Bounds)-1]
   192  }
   193  
   194  // Clear reset the histogram. Helpful in situations where we need to reset the metrics
   195  func (histogram *HistogramData) Clear() {
   196  	if histogram == nil {
   197  		return
   198  	}
   199  
   200  	histogram.Count = 0
   201  	histogram.CountPerBucket = make([]int64, len(histogram.Bounds)+1)
   202  	histogram.Sum = 0
   203  	histogram.Max = 0
   204  	histogram.Min = math.MaxInt64
   205  }