github.com/stackdocker/rkt@v0.10.1-0.20151109095037-1aa827478248/Godeps/_workspace/src/golang.org/x/net/trace/histogram.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  // This file implements histogramming for RPC statistics collection.
     8  
     9  import (
    10  	"bytes"
    11  	"fmt"
    12  	"html/template"
    13  	"log"
    14  	"math"
    15  
    16  	"github.com/coreos/rkt/Godeps/_workspace/src/golang.org/x/net/internal/timeseries"
    17  )
    18  
    19  const (
    20  	bucketCount = 38
    21  )
    22  
    23  // histogram keeps counts of values in buckets that are spaced
    24  // out in powers of 2: 0-1, 2-3, 4-7...
    25  // histogram implements timeseries.Observable
    26  type histogram struct {
    27  	sum          int64   // running total of measurements
    28  	sumOfSquares float64 // square of running total
    29  	buckets      []int64 // bucketed values for histogram
    30  	value        int     // holds a single value as an optimization
    31  	valueCount   int64   // number of values recorded for single value
    32  }
    33  
    34  // AddMeasurement records a value measurement observation to the histogram.
    35  func (h *histogram) addMeasurement(value int64) {
    36  	// TODO: assert invariant
    37  	h.sum += value
    38  	h.sumOfSquares += float64(value) * float64(value)
    39  
    40  	bucketIndex := getBucket(value)
    41  
    42  	if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
    43  		h.value = bucketIndex
    44  		h.valueCount++
    45  	} else {
    46  		h.allocateBuckets()
    47  		h.buckets[bucketIndex]++
    48  	}
    49  }
    50  
    51  func (h *histogram) allocateBuckets() {
    52  	if h.buckets == nil {
    53  		h.buckets = make([]int64, bucketCount)
    54  		h.buckets[h.value] = h.valueCount
    55  		h.value = 0
    56  		h.valueCount = -1
    57  	}
    58  }
    59  
    60  func log2(i int64) int {
    61  	n := 0
    62  	for ; i >= 0x100; i >>= 8 {
    63  		n += 8
    64  	}
    65  	for ; i > 0; i >>= 1 {
    66  		n += 1
    67  	}
    68  	return n
    69  }
    70  
    71  func getBucket(i int64) (index int) {
    72  	index = log2(i) - 1
    73  	if index < 0 {
    74  		index = 0
    75  	}
    76  	if index >= bucketCount {
    77  		index = bucketCount - 1
    78  	}
    79  	return
    80  }
    81  
    82  // Total returns the number of recorded observations.
    83  func (h *histogram) total() (total int64) {
    84  	if h.valueCount >= 0 {
    85  		total = h.valueCount
    86  	}
    87  	for _, val := range h.buckets {
    88  		total += int64(val)
    89  	}
    90  	return
    91  }
    92  
    93  // Average returns the average value of recorded observations.
    94  func (h *histogram) average() float64 {
    95  	t := h.total()
    96  	if t == 0 {
    97  		return 0
    98  	}
    99  	return float64(h.sum) / float64(t)
   100  }
   101  
   102  // Variance returns the variance of recorded observations.
   103  func (h *histogram) variance() float64 {
   104  	t := float64(h.total())
   105  	if t == 0 {
   106  		return 0
   107  	}
   108  	s := float64(h.sum) / t
   109  	return h.sumOfSquares/t - s*s
   110  }
   111  
   112  // StandardDeviation returns the standard deviation of recorded observations.
   113  func (h *histogram) standardDeviation() float64 {
   114  	return math.Sqrt(h.variance())
   115  }
   116  
   117  // PercentileBoundary estimates the value that the given fraction of recorded
   118  // observations are less than.
   119  func (h *histogram) percentileBoundary(percentile float64) int64 {
   120  	total := h.total()
   121  
   122  	// Corner cases (make sure result is strictly less than Total())
   123  	if total == 0 {
   124  		return 0
   125  	} else if total == 1 {
   126  		return int64(h.average())
   127  	}
   128  
   129  	percentOfTotal := round(float64(total) * percentile)
   130  	var runningTotal int64
   131  
   132  	for i := range h.buckets {
   133  		value := h.buckets[i]
   134  		runningTotal += value
   135  		if runningTotal == percentOfTotal {
   136  			// We hit an exact bucket boundary. If the next bucket has data, it is a
   137  			// good estimate of the value. If the bucket is empty, we interpolate the
   138  			// midpoint between the next bucket's boundary and the next non-zero
   139  			// bucket. If the remaining buckets are all empty, then we use the
   140  			// boundary for the next bucket as the estimate.
   141  			j := uint8(i + 1)
   142  			min := bucketBoundary(j)
   143  			if runningTotal < total {
   144  				for h.buckets[j] == 0 {
   145  					j++
   146  				}
   147  			}
   148  			max := bucketBoundary(j)
   149  			return min + round(float64(max-min)/2)
   150  		} else if runningTotal > percentOfTotal {
   151  			// The value is in this bucket. Interpolate the value.
   152  			delta := runningTotal - percentOfTotal
   153  			percentBucket := float64(value-delta) / float64(value)
   154  			bucketMin := bucketBoundary(uint8(i))
   155  			nextBucketMin := bucketBoundary(uint8(i + 1))
   156  			bucketSize := nextBucketMin - bucketMin
   157  			return bucketMin + round(percentBucket*float64(bucketSize))
   158  		}
   159  	}
   160  	return bucketBoundary(bucketCount - 1)
   161  }
   162  
   163  // Median returns the estimated median of the observed values.
   164  func (h *histogram) median() int64 {
   165  	return h.percentileBoundary(0.5)
   166  }
   167  
   168  // Add adds other to h.
   169  func (h *histogram) Add(other timeseries.Observable) {
   170  	o := other.(*histogram)
   171  	if o.valueCount == 0 {
   172  		// Other histogram is empty
   173  	} else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
   174  		// Both have a single bucketed value, aggregate them
   175  		h.valueCount += o.valueCount
   176  	} else {
   177  		// Two different values necessitate buckets in this histogram
   178  		h.allocateBuckets()
   179  		if o.valueCount >= 0 {
   180  			h.buckets[o.value] += o.valueCount
   181  		} else {
   182  			for i := range h.buckets {
   183  				h.buckets[i] += o.buckets[i]
   184  			}
   185  		}
   186  	}
   187  	h.sumOfSquares += o.sumOfSquares
   188  	h.sum += o.sum
   189  }
   190  
   191  // Clear resets the histogram to an empty state, removing all observed values.
   192  func (h *histogram) Clear() {
   193  	h.buckets = nil
   194  	h.value = 0
   195  	h.valueCount = 0
   196  	h.sum = 0
   197  	h.sumOfSquares = 0
   198  }
   199  
   200  // CopyFrom copies from other, which must be a *histogram, into h.
   201  func (h *histogram) CopyFrom(other timeseries.Observable) {
   202  	o := other.(*histogram)
   203  	if o.valueCount == -1 {
   204  		h.allocateBuckets()
   205  		copy(h.buckets, o.buckets)
   206  	}
   207  	h.sum = o.sum
   208  	h.sumOfSquares = o.sumOfSquares
   209  	h.value = o.value
   210  	h.valueCount = o.valueCount
   211  }
   212  
   213  // Multiply scales the histogram by the specified ratio.
   214  func (h *histogram) Multiply(ratio float64) {
   215  	if h.valueCount == -1 {
   216  		for i := range h.buckets {
   217  			h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
   218  		}
   219  	} else {
   220  		h.valueCount = int64(float64(h.valueCount) * ratio)
   221  	}
   222  	h.sum = int64(float64(h.sum) * ratio)
   223  	h.sumOfSquares = h.sumOfSquares * ratio
   224  }
   225  
   226  // New creates a new histogram.
   227  func (h *histogram) New() timeseries.Observable {
   228  	r := new(histogram)
   229  	r.Clear()
   230  	return r
   231  }
   232  
   233  func (h *histogram) String() string {
   234  	return fmt.Sprintf("%d, %f, %d, %d, %v",
   235  		h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
   236  }
   237  
   238  // round returns the closest int64 to the argument
   239  func round(in float64) int64 {
   240  	return int64(math.Floor(in + 0.5))
   241  }
   242  
   243  // bucketBoundary returns the first value in the bucket.
   244  func bucketBoundary(bucket uint8) int64 {
   245  	if bucket == 0 {
   246  		return 0
   247  	}
   248  	return 1 << bucket
   249  }
   250  
   251  // bucketData holds data about a specific bucket for use in distTmpl.
   252  type bucketData struct {
   253  	Lower, Upper       int64
   254  	N                  int64
   255  	Pct, CumulativePct float64
   256  	GraphWidth         int
   257  }
   258  
   259  // data holds data about a Distribution for use in distTmpl.
   260  type data struct {
   261  	Buckets                 []*bucketData
   262  	Count, Median           int64
   263  	Mean, StandardDeviation float64
   264  }
   265  
   266  // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
   267  const maxHTMLBarWidth = 350.0
   268  
   269  // newData returns data representing h for use in distTmpl.
   270  func (h *histogram) newData() *data {
   271  	// Force the allocation of buckets to simplify the rendering implementation
   272  	h.allocateBuckets()
   273  	// We scale the bars on the right so that the largest bar is
   274  	// maxHTMLBarWidth pixels in width.
   275  	maxBucket := int64(0)
   276  	for _, n := range h.buckets {
   277  		if n > maxBucket {
   278  			maxBucket = n
   279  		}
   280  	}
   281  	total := h.total()
   282  	barsizeMult := maxHTMLBarWidth / float64(maxBucket)
   283  	var pctMult float64
   284  	if total == 0 {
   285  		pctMult = 1.0
   286  	} else {
   287  		pctMult = 100.0 / float64(total)
   288  	}
   289  
   290  	buckets := make([]*bucketData, len(h.buckets))
   291  	runningTotal := int64(0)
   292  	for i, n := range h.buckets {
   293  		if n == 0 {
   294  			continue
   295  		}
   296  		runningTotal += n
   297  		var upperBound int64
   298  		if i < bucketCount-1 {
   299  			upperBound = bucketBoundary(uint8(i + 1))
   300  		} else {
   301  			upperBound = math.MaxInt64
   302  		}
   303  		buckets[i] = &bucketData{
   304  			Lower:         bucketBoundary(uint8(i)),
   305  			Upper:         upperBound,
   306  			N:             n,
   307  			Pct:           float64(n) * pctMult,
   308  			CumulativePct: float64(runningTotal) * pctMult,
   309  			GraphWidth:    int(float64(n) * barsizeMult),
   310  		}
   311  	}
   312  	return &data{
   313  		Buckets:           buckets,
   314  		Count:             total,
   315  		Median:            h.median(),
   316  		Mean:              h.average(),
   317  		StandardDeviation: h.standardDeviation(),
   318  	}
   319  }
   320  
   321  func (h *histogram) html() template.HTML {
   322  	buf := new(bytes.Buffer)
   323  	if err := distTmpl.Execute(buf, h.newData()); err != nil {
   324  		buf.Reset()
   325  		log.Printf("net/trace: couldn't execute template: %v", err)
   326  	}
   327  	return template.HTML(buf.String())
   328  }
   329  
   330  // Input: data
   331  var distTmpl = template.Must(template.New("distTmpl").Parse(`
   332  <table>
   333  <tr>
   334      <td style="padding:0.25em">Count: {{.Count}}</td>
   335      <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
   336      <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
   337      <td style="padding:0.25em">Median: {{.Median}}</td>
   338  </tr>
   339  </table>
   340  <hr>
   341  <table>
   342  {{range $b := .Buckets}}
   343  {{if $b}}
   344    <tr>
   345      <td style="padding:0 0 0 0.25em">[</td>
   346      <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
   347      <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
   348      <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
   349      <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
   350      <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
   351      <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
   352    </tr>
   353  {{end}}
   354  {{end}}
   355  </table>
   356  `))