github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/graphite/stats/statistics.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package stats
    22  
    23  import (
    24  	"math"
    25  )
    26  
    27  // Values presents a set of data values as an array, for the purposes of aggregation
    28  type Values interface {
    29  	// Len returns the number of values present
    30  	Len() int
    31  
    32  	// ValueAt returns the value at the nth element
    33  	ValueAt(n int) float64
    34  }
    35  
    36  // MutableValues is a set of data values that can be modified
    37  type MutableValues interface {
    38  	Values
    39  
    40  	// SetValueAt sets the value at the nth element
    41  	SetValueAt(n int, v float64)
    42  }
    43  
    44  // Float64Values is a simple Values implementation around a slice
    45  type Float64Values []float64
    46  
    47  // Len returns the number of elements in the array
    48  func (vals Float64Values) Len() int { return len(vals) }
    49  
    50  // ValueAt returns the value at the nth element
    51  func (vals Float64Values) ValueAt(n int) float64 { return vals[n] }
    52  
    53  // SetValueAt sets the value at the nth element
    54  func (vals Float64Values) SetValueAt(n int, v float64) { vals[n] = v }
    55  
    56  // Statistics are the computation of standard statistics (min, max, mean, count, stddev)
    57  // over a group of values.
    58  type Statistics struct {
    59  	Min    float64
    60  	Max    float64
    61  	Mean   float64
    62  	Count  uint
    63  	Sum    float64
    64  	StdDev float64
    65  }
    66  
    67  // Merge merges a group of statistics
    68  func Merge(statistics []Statistics) Statistics {
    69  	var (
    70  		count               uint
    71  		min, max, mean, sum float64
    72  	)
    73  
    74  	for _, a := range statistics {
    75  		if a.Count == 0 {
    76  			continue
    77  		}
    78  
    79  		if count == 0 {
    80  			min, max = a.Min, a.Max
    81  		} else {
    82  			min, max = math.Min(min, a.Min), math.Max(max, a.Max)
    83  		}
    84  
    85  		priorCount := count
    86  		count += a.Count
    87  		sum += a.Sum
    88  		mean = ((a.Mean * float64(a.Count)) + (mean * float64(priorCount))) / float64(count)
    89  	}
    90  
    91  	if count == 0 {
    92  		return Statistics{}
    93  	}
    94  
    95  	var sum1, sum2 float64
    96  	for _, a := range statistics {
    97  		if a.Count == 0 {
    98  			continue
    99  		}
   100  
   101  		variance := a.StdDev * a.StdDev
   102  		avg := a.Mean
   103  		sum1 += float64(a.Count) * variance
   104  		sum2 += float64(a.Count) * math.Pow(avg-mean, 2)
   105  	}
   106  
   107  	variance := ((sum1 + sum2) / float64(count))
   108  	return Statistics{
   109  		Count:  count,
   110  		Min:    min,
   111  		Max:    max,
   112  		Mean:   mean,
   113  		Sum:    sum,
   114  		StdDev: math.Sqrt(variance),
   115  	}
   116  }
   117  
   118  func calc(values Values) (uint, float64, float64, float64, float64, float64) {
   119  	count := uint(0)
   120  	sum := float64(0)
   121  	min := math.MaxFloat64
   122  	max := -math.MaxFloat64
   123  	for i := 0; i < values.Len(); i++ {
   124  		n := values.ValueAt(i)
   125  		if math.IsNaN(n) {
   126  			continue
   127  		}
   128  		count++
   129  		sum += n
   130  		min = math.Min(n, min)
   131  		max = math.Max(n, max)
   132  	}
   133  
   134  	if count == 0 {
   135  		nan := math.NaN()
   136  		return 0, nan, nan, nan, nan, nan
   137  	}
   138  
   139  	mean := float64(0)
   140  	if count > 0 {
   141  		mean = sum / float64(count)
   142  	}
   143  
   144  	stddev := float64(0)
   145  	if count > 1 {
   146  		m2 := float64(0)
   147  		for i := 0; i < values.Len(); i++ {
   148  			n := values.ValueAt(i)
   149  			if math.IsNaN(n) {
   150  				continue
   151  			}
   152  
   153  			diff := n - mean
   154  			m2 += diff * diff
   155  		}
   156  
   157  		variance := m2 / float64(count-1)
   158  		stddev = math.Sqrt(variance)
   159  	}
   160  	return count, min, max, mean, sum, stddev
   161  }
   162  
   163  // Calc calculates statistics for a set of values
   164  func Calc(values Values) Statistics {
   165  	count, min, max, mean, sum, stddev := calc(values)
   166  	return Statistics{
   167  		Count:  count,
   168  		Min:    min,
   169  		Max:    max,
   170  		Mean:   mean,
   171  		Sum:    sum,
   172  		StdDev: stddev,
   173  	}
   174  }
   175  
   176  // SingleCountStatistics returns Statistics for a single value
   177  func SingleCountStatistics(value float64) Statistics {
   178  	return Statistics{
   179  		Count:  1,
   180  		Min:    value,
   181  		Max:    value,
   182  		Sum:    value,
   183  		Mean:   value,
   184  		StdDev: 0,
   185  	}
   186  }
   187  
   188  // ZeroCountStatistics returns statistics when no values are present
   189  // (or when all values are NaNs)
   190  func ZeroCountStatistics() Statistics {
   191  	nan := math.NaN()
   192  	return Statistics{
   193  		Count:  0,
   194  		Min:    nan,
   195  		Max:    nan,
   196  		Sum:    nan,
   197  		Mean:   nan,
   198  		StdDev: nan,
   199  	}
   200  }