golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/trace/histogram.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  // This file implements histogramming for RPC statistics collection.
     8  
     9  import (
    10  	"bytes"
    11  	"fmt"
    12  	"html/template"
    13  	"log"
    14  	"math"
    15  	"sync"
    16  
    17  	"golang.org/x/net/internal/timeseries"
    18  )
    19  
    20  const (
    21  	bucketCount = 38
    22  )
    23  
    24  // histogram keeps counts of values in buckets that are spaced
    25  // out in powers of 2: 0-1, 2-3, 4-7...
    26  // histogram implements timeseries.Observable
    27  type histogram struct {
    28  	sum          int64   // running total of measurements
    29  	sumOfSquares float64 // square of running total
    30  	buckets      []int64 // bucketed values for histogram
    31  	value        int     // holds a single value as an optimization
    32  	valueCount   int64   // number of values recorded for single value
    33  }
    34  
    35  // addMeasurement records a value measurement observation to the histogram.
    36  func (h *histogram) addMeasurement(value int64) {
    37  	// TODO: assert invariant
    38  	h.sum += value
    39  	h.sumOfSquares += float64(value) * float64(value)
    40  
    41  	bucketIndex := getBucket(value)
    42  
    43  	if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
    44  		h.value = bucketIndex
    45  		h.valueCount++
    46  	} else {
    47  		h.allocateBuckets()
    48  		h.buckets[bucketIndex]++
    49  	}
    50  }
    51  
    52  func (h *histogram) allocateBuckets() {
    53  	if h.buckets == nil {
    54  		h.buckets = make([]int64, bucketCount)
    55  		h.buckets[h.value] = h.valueCount
    56  		h.value = 0
    57  		h.valueCount = -1
    58  	}
    59  }
    60  
    61  func log2(i int64) int {
    62  	n := 0
    63  	for ; i >= 0x100; i >>= 8 {
    64  		n += 8
    65  	}
    66  	for ; i > 0; i >>= 1 {
    67  		n += 1
    68  	}
    69  	return n
    70  }
    71  
    72  func getBucket(i int64) (index int) {
    73  	index = log2(i) - 1
    74  	if index < 0 {
    75  		index = 0
    76  	}
    77  	if index >= bucketCount {
    78  		index = bucketCount - 1
    79  	}
    80  	return
    81  }
    82  
    83  // Total returns the number of recorded observations.
    84  func (h *histogram) total() (total int64) {
    85  	if h.valueCount >= 0 {
    86  		total = h.valueCount
    87  	}
    88  	for _, val := range h.buckets {
    89  		total += int64(val)
    90  	}
    91  	return
    92  }
    93  
    94  // Average returns the average value of recorded observations.
    95  func (h *histogram) average() float64 {
    96  	t := h.total()
    97  	if t == 0 {
    98  		return 0
    99  	}
   100  	return float64(h.sum) / float64(t)
   101  }
   102  
   103  // Variance returns the variance of recorded observations.
   104  func (h *histogram) variance() float64 {
   105  	t := float64(h.total())
   106  	if t == 0 {
   107  		return 0
   108  	}
   109  	s := float64(h.sum) / t
   110  	return h.sumOfSquares/t - s*s
   111  }
   112  
   113  // StandardDeviation returns the standard deviation of recorded observations.
   114  func (h *histogram) standardDeviation() float64 {
   115  	return math.Sqrt(h.variance())
   116  }
   117  
   118  // PercentileBoundary estimates the value that the given fraction of recorded
   119  // observations are less than.
   120  func (h *histogram) percentileBoundary(percentile float64) int64 {
   121  	total := h.total()
   122  
   123  	// Corner cases (make sure result is strictly less than Total())
   124  	if total == 0 {
   125  		return 0
   126  	} else if total == 1 {
   127  		return int64(h.average())
   128  	}
   129  
   130  	percentOfTotal := round(float64(total) * percentile)
   131  	var runningTotal int64
   132  
   133  	for i := range h.buckets {
   134  		value := h.buckets[i]
   135  		runningTotal += value
   136  		if runningTotal == percentOfTotal {
   137  			// We hit an exact bucket boundary. If the next bucket has data, it is a
   138  			// good estimate of the value. If the bucket is empty, we interpolate the
   139  			// midpoint between the next bucket's boundary and the next non-zero
   140  			// bucket. If the remaining buckets are all empty, then we use the
   141  			// boundary for the next bucket as the estimate.
   142  			j := uint8(i + 1)
   143  			min := bucketBoundary(j)
   144  			if runningTotal < total {
   145  				for h.buckets[j] == 0 {
   146  					j++
   147  				}
   148  			}
   149  			max := bucketBoundary(j)
   150  			return min + round(float64(max-min)/2)
   151  		} else if runningTotal > percentOfTotal {
   152  			// The value is in this bucket. Interpolate the value.
   153  			delta := runningTotal - percentOfTotal
   154  			percentBucket := float64(value-delta) / float64(value)
   155  			bucketMin := bucketBoundary(uint8(i))
   156  			nextBucketMin := bucketBoundary(uint8(i + 1))
   157  			bucketSize := nextBucketMin - bucketMin
   158  			return bucketMin + round(percentBucket*float64(bucketSize))
   159  		}
   160  	}
   161  	return bucketBoundary(bucketCount - 1)
   162  }
   163  
   164  // Median returns the estimated median of the observed values.
   165  func (h *histogram) median() int64 {
   166  	return h.percentileBoundary(0.5)
   167  }
   168  
   169  // Add adds other to h.
   170  func (h *histogram) Add(other timeseries.Observable) {
   171  	o := other.(*histogram)
   172  	if o.valueCount == 0 {
   173  		// Other histogram is empty
   174  	} else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
   175  		// Both have a single bucketed value, aggregate them
   176  		h.valueCount += o.valueCount
   177  	} else {
   178  		// Two different values necessitate buckets in this histogram
   179  		h.allocateBuckets()
   180  		if o.valueCount >= 0 {
   181  			h.buckets[o.value] += o.valueCount
   182  		} else {
   183  			for i := range h.buckets {
   184  				h.buckets[i] += o.buckets[i]
   185  			}
   186  		}
   187  	}
   188  	h.sumOfSquares += o.sumOfSquares
   189  	h.sum += o.sum
   190  }
   191  
   192  // Clear resets the histogram to an empty state, removing all observed values.
   193  func (h *histogram) Clear() {
   194  	h.buckets = nil
   195  	h.value = 0
   196  	h.valueCount = 0
   197  	h.sum = 0
   198  	h.sumOfSquares = 0
   199  }
   200  
   201  // CopyFrom copies from other, which must be a *histogram, into h.
   202  func (h *histogram) CopyFrom(other timeseries.Observable) {
   203  	o := other.(*histogram)
   204  	if o.valueCount == -1 {
   205  		h.allocateBuckets()
   206  		copy(h.buckets, o.buckets)
   207  	}
   208  	h.sum = o.sum
   209  	h.sumOfSquares = o.sumOfSquares
   210  	h.value = o.value
   211  	h.valueCount = o.valueCount
   212  }
   213  
   214  // Multiply scales the histogram by the specified ratio.
   215  func (h *histogram) Multiply(ratio float64) {
   216  	if h.valueCount == -1 {
   217  		for i := range h.buckets {
   218  			h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
   219  		}
   220  	} else {
   221  		h.valueCount = int64(float64(h.valueCount) * ratio)
   222  	}
   223  	h.sum = int64(float64(h.sum) * ratio)
   224  	h.sumOfSquares = h.sumOfSquares * ratio
   225  }
   226  
   227  // New creates a new histogram.
   228  func (h *histogram) New() timeseries.Observable {
   229  	r := new(histogram)
   230  	r.Clear()
   231  	return r
   232  }
   233  
   234  func (h *histogram) String() string {
   235  	return fmt.Sprintf("%d, %f, %d, %d, %v",
   236  		h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
   237  }
   238  
   239  // round returns the closest int64 to the argument
   240  func round(in float64) int64 {
   241  	return int64(math.Floor(in + 0.5))
   242  }
   243  
   244  // bucketBoundary returns the first value in the bucket.
   245  func bucketBoundary(bucket uint8) int64 {
   246  	if bucket == 0 {
   247  		return 0
   248  	}
   249  	return 1 << bucket
   250  }
   251  
   252  // bucketData holds data about a specific bucket for use in distTmpl.
   253  type bucketData struct {
   254  	Lower, Upper       int64
   255  	N                  int64
   256  	Pct, CumulativePct float64
   257  	GraphWidth         int
   258  }
   259  
   260  // data holds data about a Distribution for use in distTmpl.
   261  type data struct {
   262  	Buckets                 []*bucketData
   263  	Count, Median           int64
   264  	Mean, StandardDeviation float64
   265  }
   266  
   267  // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
   268  const maxHTMLBarWidth = 350.0
   269  
   270  // newData returns data representing h for use in distTmpl.
   271  func (h *histogram) newData() *data {
   272  	// Force the allocation of buckets to simplify the rendering implementation
   273  	h.allocateBuckets()
   274  	// We scale the bars on the right so that the largest bar is
   275  	// maxHTMLBarWidth pixels in width.
   276  	maxBucket := int64(0)
   277  	for _, n := range h.buckets {
   278  		if n > maxBucket {
   279  			maxBucket = n
   280  		}
   281  	}
   282  	total := h.total()
   283  	barsizeMult := maxHTMLBarWidth / float64(maxBucket)
   284  	var pctMult float64
   285  	if total == 0 {
   286  		pctMult = 1.0
   287  	} else {
   288  		pctMult = 100.0 / float64(total)
   289  	}
   290  
   291  	buckets := make([]*bucketData, len(h.buckets))
   292  	runningTotal := int64(0)
   293  	for i, n := range h.buckets {
   294  		if n == 0 {
   295  			continue
   296  		}
   297  		runningTotal += n
   298  		var upperBound int64
   299  		if i < bucketCount-1 {
   300  			upperBound = bucketBoundary(uint8(i + 1))
   301  		} else {
   302  			upperBound = math.MaxInt64
   303  		}
   304  		buckets[i] = &bucketData{
   305  			Lower:         bucketBoundary(uint8(i)),
   306  			Upper:         upperBound,
   307  			N:             n,
   308  			Pct:           float64(n) * pctMult,
   309  			CumulativePct: float64(runningTotal) * pctMult,
   310  			GraphWidth:    int(float64(n) * barsizeMult),
   311  		}
   312  	}
   313  	return &data{
   314  		Buckets:           buckets,
   315  		Count:             total,
   316  		Median:            h.median(),
   317  		Mean:              h.average(),
   318  		StandardDeviation: h.standardDeviation(),
   319  	}
   320  }
   321  
   322  func (h *histogram) html() template.HTML {
   323  	buf := new(bytes.Buffer)
   324  	if err := distTmpl().Execute(buf, h.newData()); err != nil {
   325  		buf.Reset()
   326  		log.Printf("net/trace: couldn't execute template: %v", err)
   327  	}
   328  	return template.HTML(buf.String())
   329  }
   330  
   331  var distTmplCache *template.Template
   332  var distTmplOnce sync.Once
   333  
   334  func distTmpl() *template.Template {
   335  	distTmplOnce.Do(func() {
   336  		// Input: data
   337  		distTmplCache = template.Must(template.New("distTmpl").Parse(`
   338  <table>
   339  <tr>
   340      <td style="padding:0.25em">Count: {{.Count}}</td>
   341      <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
   342      <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
   343      <td style="padding:0.25em">Median: {{.Median}}</td>
   344  </tr>
   345  </table>
   346  <hr>
   347  <table>
   348  {{range $b := .Buckets}}
   349  {{if $b}}
   350    <tr>
   351      <td style="padding:0 0 0 0.25em">[</td>
   352      <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
   353      <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
   354      <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
   355      <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
   356      <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
   357      <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
   358    </tr>
   359  {{end}}
   360  {{end}}
   361  </table>
   362  `))
   363  	})
   364  	return distTmplCache
   365  }