github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/functions/aggregation/function.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package aggregation
    22  
    23  import (
    24  	"math"
    25  )
    26  
    27  const (
    28  	// SumType adds all non nan elements in a list of series.
    29  	SumType = "sum"
    30  	// MinType takes the minimum all non nan elements in a list of series.
    31  	MinType = "min"
    32  	// MaxType takes the maximum all non nan elements in a list of series.
    33  	MaxType = "max"
    34  	// AverageType averages all non nan elements in a list of series.
    35  	AverageType = "avg"
    36  	// StandardDeviationType takes the population standard deviation of all non
    37  	// nan elements in a list of series.
    38  	StandardDeviationType = "stddev"
    39  	// StandardVarianceType takes the population standard variance of all non
    40  	// nan elements in a list of series.
    41  	StandardVarianceType = "var"
    42  	// CountType counts all non nan elements in a list of series.
    43  	CountType = "count"
    44  )
    45  
    46  func absentFn(values []float64, bucket []int) float64 {
    47  	for _, idx := range bucket {
    48  		v := values[idx]
    49  		if !math.IsNaN(v) {
    50  			return math.NaN()
    51  		}
    52  	}
    53  
    54  	return 1
    55  }
    56  
    57  func sumAndCount(values []float64, bucket []int) (float64, float64) {
    58  	sum := 0.0
    59  	count := 0.0
    60  	for _, idx := range bucket {
    61  		v := values[idx]
    62  		if !math.IsNaN(v) {
    63  			sum += v
    64  			count++
    65  		}
    66  	}
    67  
    68  	// If all elements are NaN, sum should be NaN.
    69  	if count == 0 {
    70  		sum = math.NaN()
    71  	}
    72  
    73  	return sum, count
    74  }
    75  
    76  func sumFn(values []float64, bucket []int) float64 {
    77  	sum, _ := sumAndCount(values, bucket)
    78  	return sum
    79  }
    80  
    81  func minFn(values []float64, bucket []int) float64 {
    82  	min := math.NaN()
    83  	for _, idx := range bucket {
    84  		v := values[idx]
    85  		if !math.IsNaN(v) {
    86  			if math.IsNaN(min) || min > v {
    87  				min = v
    88  			}
    89  		}
    90  	}
    91  
    92  	return min
    93  }
    94  
    95  func maxFn(values []float64, bucket []int) float64 {
    96  	max := math.NaN()
    97  	for _, idx := range bucket {
    98  		v := values[idx]
    99  		if !math.IsNaN(v) {
   100  			if math.IsNaN(max) || max < v {
   101  				max = v
   102  			}
   103  		}
   104  	}
   105  
   106  	return max
   107  }
   108  
   109  func averageFn(values []float64, bucket []int) float64 {
   110  	sum, count := sumAndCount(values, bucket)
   111  
   112  	// Cannot take average of no values
   113  	if count == 0 {
   114  		return math.NaN()
   115  	}
   116  
   117  	return sum / count
   118  }
   119  
   120  func stddevFn(values []float64, bucket []int) float64 {
   121  	return math.Sqrt(varianceFn(values, bucket))
   122  }
   123  
   124  func varianceFn(values []float64, bucket []int) float64 {
   125  	if len(values) == 0 || len(bucket) == 0 {
   126  		return math.NaN()
   127  	}
   128  
   129  	// Using Welford's online algorithm for calculating variance
   130  	// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
   131  	//
   132  	// This algorithm is used in Prometheus and also should provide better numerical precision than
   133  	// the straight-forward implementation of the variance formula. The algorithm iterates through the values
   134  	// and at the each step recalculates mean and variance of the values seen so far.
   135  
   136  	var (
   137  		count                = 0
   138  		partialMean          = 0.0
   139  		partialVarTimesCount = 0.0 // for better precision, calculate `variance * count` and divide at the end
   140  	)
   141  
   142  	for _, idx := range bucket {
   143  		v := values[idx]
   144  		if !math.IsNaN(v) {
   145  			count++
   146  
   147  			delta1 := v - partialMean
   148  			partialMean += delta1 / float64(count)
   149  			delta2 := v - partialMean
   150  
   151  			partialVarTimesCount += delta1 * delta2
   152  		}
   153  	}
   154  
   155  	if count < 1 {
   156  		return math.NaN()
   157  	}
   158  
   159  	return partialVarTimesCount / float64(count)
   160  }
   161  
   162  func countFn(values []float64, bucket []int) float64 {
   163  	_, count := sumAndCount(values, bucket)
   164  	return count
   165  }