github.com/google/cloudprober@v0.11.3/metrics/dist.go (about)

     1  // Copyright 2017 The Cloudprober Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metrics
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"math"
    21  	"reflect"
    22  	"sort"
    23  	"strconv"
    24  	"strings"
    25  	"sync"
    26  
    27  	distpb "github.com/google/cloudprober/metrics/proto"
    28  	"google.golang.org/api/googleapi"
    29  	monitoring "google.golang.org/api/monitoring/v3"
    30  )
    31  
    32  // Distribution metrics type implements a histogram of values distributed over
    33  // a set of pre-defined buckets.
    34  type Distribution struct {
    35  	mu           sync.RWMutex
    36  	lowerBounds  []float64 // bucket lower bounds
    37  	bucketCounts []int64
    38  	count        int64   // count of all values
    39  	sum          float64 // sum of all samples.
    40  }
    41  
    42  // NewDistribution returns a new distribution container.
    43  func NewDistribution(lowerBounds []float64) *Distribution {
    44  	return &Distribution{
    45  		lowerBounds:  append([]float64{math.Inf(-1)}, lowerBounds...),
    46  		bucketCounts: make([]int64, len(lowerBounds)+1),
    47  	}
    48  }
    49  
    50  // NewExponentialDistribution returns a distribution container with
    51  // exponentially growing bucket sizes. Buckets' lower bounds are determined as
    52  // follows:
    53  // -Inf,
    54  // 0,
    55  // scale_factor,
    56  // scale_factor * base,
    57  // scale_factor * base^2,
    58  // ...
    59  // scale_factor * base^(i-1).., ith bucket
    60  // ...
    61  // scale_factor * base^(numBuckets), last element (numBuckets+1-th)
    62  func NewExponentialDistribution(base, scaleFactor float64, numBuckets int) (*Distribution, error) {
    63  	if base < 1.01 {
    64  		return nil, fmt.Errorf("exponential distribution's base (%f) should be at least 1.01", base)
    65  	}
    66  	lowerBounds := make([]float64, numBuckets+1)
    67  	lowerBounds[0] = 0
    68  	for i := 1; i < len(lowerBounds); i++ {
    69  		lowerBounds[i] = scaleFactor * math.Pow(base, float64(i-1))
    70  	}
    71  	return NewDistribution(lowerBounds), nil
    72  }
    73  
    74  // NewDistributionFromProto returns a new distribution based on the provided
    75  // protobuf.
    76  func NewDistributionFromProto(distProto *distpb.Dist) (*Distribution, error) {
    77  
    78  	switch distProto.Buckets.(type) {
    79  
    80  	case *distpb.Dist_ExplicitBuckets:
    81  		lbStringA := strings.Split(distProto.GetExplicitBuckets(), ",")
    82  		lowerBounds := make([]float64, len(lbStringA))
    83  		for i, tok := range lbStringA {
    84  			lb, err := strconv.ParseFloat(tok, 64)
    85  			if err != nil {
    86  				return nil, fmt.Errorf("invalid lower bound for bucket: %s. Err: %v", tok, err)
    87  			}
    88  			lowerBounds[i] = lb
    89  		}
    90  		return NewDistribution(lowerBounds), nil
    91  
    92  	case *distpb.Dist_ExponentialBuckets:
    93  		expb := distProto.GetExponentialBuckets()
    94  		return NewExponentialDistribution(float64(expb.GetBase()), float64(expb.GetScaleFactor()), int(expb.GetNumBuckets()))
    95  	}
    96  
    97  	return nil, fmt.Errorf("unknown buckets type: %v", distProto.Buckets)
    98  }
    99  
   100  func (d *Distribution) bucketIndex(sample float64) int {
   101  	return sort.Search(len(d.lowerBounds), func(i int) bool { return sample < d.lowerBounds[i] }) - 1
   102  }
   103  
   104  // AddSample adds a sample to the receiver distribution.
   105  func (d *Distribution) AddSample(sample float64) {
   106  	d.mu.Lock()
   107  	defer d.mu.Unlock()
   108  	d.bucketCounts[d.bucketIndex(sample)]++
   109  	d.sum += sample
   110  	d.count++
   111  }
   112  
   113  // AddInt64 adds an int64 to the receiver distribution.
   114  func (d *Distribution) AddInt64(i int64) {
   115  	d.AddSample(float64(i))
   116  }
   117  
   118  // AddFloat64 adds an float64 to the receiver distribution.
   119  func (d *Distribution) AddFloat64(f float64) {
   120  	d.AddSample(f)
   121  }
   122  
   123  // Add adds a distribution to the receiver distribution. If both distributions
   124  // don't have the same buckets, an error is returned.
   125  func (d *Distribution) Add(val Value) error {
   126  	_, err := d.addOrSubtract(val, false)
   127  	return err
   128  }
   129  
   130  // SubtractCounter subtracts the provided "lastVal", assuming that value
   131  // represents a counter, i.e. if "value" is less than "lastVal", we assume that
   132  // counter has been reset and don't subtract.
   133  func (d *Distribution) SubtractCounter(lastVal Value) (bool, error) {
   134  	return d.addOrSubtract(lastVal, true)
   135  }
   136  
   137  func (d *Distribution) addOrSubtract(val Value, subtract bool) (bool, error) {
   138  	delta, ok := val.(*Distribution)
   139  	if !ok {
   140  		return false, errors.New("dist: incompatible value to add or subtract")
   141  	}
   142  
   143  	if !reflect.DeepEqual(d.lowerBounds, delta.lowerBounds) {
   144  		return false, fmt.Errorf("incompatible delta value, Bucket lower bounds in receiver distribution: %v, and in delta distribution: %v", d.lowerBounds, delta.lowerBounds)
   145  	}
   146  	d.mu.Lock()
   147  	defer d.mu.Unlock()
   148  	delta.mu.RLock()
   149  	defer delta.mu.RUnlock()
   150  
   151  	if subtract {
   152  		// If receiver count is less than lastVal' count, assume reset and return.
   153  		if d.count < delta.count {
   154  			return true, nil
   155  		}
   156  		d.count -= delta.count
   157  		d.sum -= delta.sum
   158  	} else {
   159  		d.count += delta.count
   160  		d.sum += delta.sum
   161  	}
   162  
   163  	for i := 0; i < len(d.bucketCounts); i++ {
   164  		if subtract {
   165  			d.bucketCounts[i] -= delta.bucketCounts[i]
   166  		} else {
   167  			d.bucketCounts[i] += delta.bucketCounts[i]
   168  		}
   169  	}
   170  
   171  	return false, nil
   172  }
   173  
   174  // String returns a string representation of the distribution:
   175  // "dist:sum:<sum>|count:<count>|lb:<lower bounds>|bc:<bucket counts>"
   176  // For example for a distribution with lower bounds 0.5, 2.0, 7.5 and
   177  // bucket counts 34, 54, 121, 12, string representation will look like the
   178  // following:
   179  // dist:sum:899|count:221|lb:-Inf,0.5,2,7.5|bc:34,54,121,12
   180  func (d *Distribution) String() string {
   181  	d.mu.RLock()
   182  	defer d.mu.RUnlock()
   183  
   184  	var b strings.Builder
   185  
   186  	b.WriteString("dist:sum:")
   187  	b.WriteString(strconv.FormatFloat(d.sum, 'f', -1, 64))
   188  	b.WriteString("|count:")
   189  	b.WriteString(strconv.FormatInt(d.count, 10))
   190  
   191  	b.WriteString("|lb:")
   192  	for i, lb := range d.lowerBounds {
   193  		if i != 0 {
   194  			b.WriteByte(',')
   195  		}
   196  		b.WriteString(strconv.FormatFloat(lb, 'f', -1, 64))
   197  	}
   198  
   199  	b.WriteString("|bc:")
   200  	for i, c := range d.bucketCounts {
   201  		if i != 0 {
   202  			b.WriteByte(',')
   203  		}
   204  		b.WriteString(strconv.FormatInt(c, 10))
   205  	}
   206  
   207  	return b.String()
   208  }
   209  
   210  // Verify verifies that the distribution is valid.
   211  func (d *Distribution) Verify() error {
   212  	if len(d.lowerBounds) == 0 {
   213  		return errors.New("no distribution buckets found")
   214  	}
   215  	if len(d.lowerBounds) != len(d.bucketCounts) {
   216  		return fmt.Errorf("size mismatch between buckets array (%v) and bucket counts array (%v)", d.lowerBounds, d.bucketCounts)
   217  	}
   218  	var countSum int64
   219  	for _, c := range d.bucketCounts {
   220  		countSum += c
   221  	}
   222  	if d.count != countSum {
   223  		return fmt.Errorf("sum of bucket counts (%d) don't match with the overall count (%d)", countSum, d.count)
   224  	}
   225  	return nil
   226  }
   227  
   228  // ParseDistFromString parses a distribution value from a string that's in a
   229  // format that's generated by the String() method:
   230  // Example string: dist:sum:899|count:221|lb:-Inf,0.5,2,7.5|bc:34,54,121,12
   231  func ParseDistFromString(str string) (*Distribution, error) {
   232  	tokens := strings.SplitN(str, ":", 2)
   233  	if len(tokens) != 2 || tokens[0] != "dist" {
   234  		return nil, fmt.Errorf("invalid distribution string: %s", str)
   235  	}
   236  
   237  	d := &Distribution{}
   238  
   239  	var f float64
   240  	var i int64
   241  	var err error
   242  
   243  	errF := func(kv []string, err error) (*Distribution, error) {
   244  		return nil, fmt.Errorf("invalid token (%s:%s) in the distribution string: %s. Err: %v", kv[0], kv[1], str, err)
   245  	}
   246  
   247  	for _, tok := range strings.Split(tokens[1], "|") {
   248  		kv := strings.Split(tok, ":")
   249  		if len(kv) != 2 {
   250  			return nil, fmt.Errorf("invalid distribution string: %s", str)
   251  		}
   252  		switch kv[0] {
   253  		case "sum":
   254  			if f, err = strconv.ParseFloat(kv[1], 64); err != nil {
   255  				return errF(kv, err)
   256  			}
   257  			d.sum = f
   258  		case "count":
   259  			if i, err = strconv.ParseInt(kv[1], 10, 64); err != nil {
   260  				return errF(kv, err)
   261  			}
   262  			d.count = i
   263  		case "lb":
   264  			for _, vs := range strings.Split(kv[1], ",") {
   265  				if f, err = strconv.ParseFloat(vs, 64); err != nil {
   266  					return errF(kv, err)
   267  				}
   268  				d.lowerBounds = append(d.lowerBounds, f)
   269  			}
   270  		case "bc":
   271  			for _, vs := range strings.Split(kv[1], ",") {
   272  				if i, err = strconv.ParseInt(vs, 10, 64); err != nil {
   273  					return errF(kv, err)
   274  				}
   275  				d.bucketCounts = append(d.bucketCounts, i)
   276  			}
   277  		default:
   278  			return errF(kv, nil)
   279  		}
   280  	}
   281  	if err := d.Verify(); err != nil {
   282  		return nil, err
   283  	}
   284  	return d, nil
   285  }
   286  
   287  // DistributionData stuct, along with Data() function, provides a way to
   288  // readily share the Distribution data with other packages.
   289  type DistributionData struct {
   290  	LowerBounds  []float64 // bucket lower bounds
   291  	BucketCounts []int64
   292  	Count        int64   // count of all values
   293  	Sum          float64 // sum of all samples.
   294  }
   295  
   296  // Data returns a DistributionData object, built using Distribution's current
   297  // state.
   298  func (d *Distribution) Data() *DistributionData {
   299  	d.mu.RLock()
   300  	defer d.mu.RUnlock()
   301  	return &DistributionData{
   302  		LowerBounds:  d.lowerBounds,
   303  		BucketCounts: d.bucketCounts,
   304  		Count:        d.count,
   305  		Sum:          d.sum,
   306  	}
   307  }
   308  
   309  // StackdriverTypedValue returns a Stackdriver typed value corresponding to the
   310  // receiver distribution. This routine is used by stackdriver surfacer.
   311  func (d *Distribution) StackdriverTypedValue() *monitoring.TypedValue {
   312  	d.mu.RLock()
   313  	defer d.mu.RUnlock()
   314  	distVal := &monitoring.Distribution{
   315  		BucketCounts: googleapi.Int64s(append([]int64{}, d.bucketCounts...)),
   316  		BucketOptions: &monitoring.BucketOptions{
   317  			ExplicitBuckets: &monitoring.Explicit{
   318  				Bounds: append([]float64{}, d.lowerBounds[1:]...),
   319  			},
   320  		},
   321  		Count: d.count,
   322  	}
   323  	return &monitoring.TypedValue{
   324  		DistributionValue: distVal,
   325  	}
   326  }
   327  
   328  // Clone returns a copy of the receiver distribution.
   329  func (d *Distribution) Clone() Value {
   330  	d.mu.RLock()
   331  	defer d.mu.RUnlock()
   332  	newD := NewDistribution(d.lowerBounds[1:])
   333  	newD.sum = d.sum
   334  	newD.count = d.count
   335  	for i := range d.bucketCounts {
   336  		newD.bucketCounts[i] = d.bucketCounts[i]
   337  	}
   338  	return newD
   339  }