github.com/m3db/m3@v1.5.0/src/query/functions/aggregation/function.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package aggregation 22 23 import ( 24 "math" 25 ) 26 27 const ( 28 // SumType adds all non nan elements in a list of series. 29 SumType = "sum" 30 // MinType takes the minimum all non nan elements in a list of series. 31 MinType = "min" 32 // MaxType takes the maximum all non nan elements in a list of series. 33 MaxType = "max" 34 // AverageType averages all non nan elements in a list of series. 35 AverageType = "avg" 36 // StandardDeviationType takes the population standard deviation of all non 37 // nan elements in a list of series. 38 StandardDeviationType = "stddev" 39 // StandardVarianceType takes the population standard variance of all non 40 // nan elements in a list of series. 41 StandardVarianceType = "var" 42 // CountType counts all non nan elements in a list of series. 43 CountType = "count" 44 ) 45 46 func absentFn(values []float64, bucket []int) float64 { 47 for _, idx := range bucket { 48 v := values[idx] 49 if !math.IsNaN(v) { 50 return math.NaN() 51 } 52 } 53 54 return 1 55 } 56 57 func sumAndCount(values []float64, bucket []int) (float64, float64) { 58 sum := 0.0 59 count := 0.0 60 for _, idx := range bucket { 61 v := values[idx] 62 if !math.IsNaN(v) { 63 sum += v 64 count++ 65 } 66 } 67 68 // If all elements are NaN, sum should be NaN. 69 if count == 0 { 70 sum = math.NaN() 71 } 72 73 return sum, count 74 } 75 76 func sumFn(values []float64, bucket []int) float64 { 77 sum, _ := sumAndCount(values, bucket) 78 return sum 79 } 80 81 func minFn(values []float64, bucket []int) float64 { 82 min := math.NaN() 83 for _, idx := range bucket { 84 v := values[idx] 85 if !math.IsNaN(v) { 86 if math.IsNaN(min) || min > v { 87 min = v 88 } 89 } 90 } 91 92 return min 93 } 94 95 func maxFn(values []float64, bucket []int) float64 { 96 max := math.NaN() 97 for _, idx := range bucket { 98 v := values[idx] 99 if !math.IsNaN(v) { 100 if math.IsNaN(max) || max < v { 101 max = v 102 } 103 } 104 } 105 106 return max 107 } 108 109 func averageFn(values []float64, bucket []int) float64 { 110 sum, count := sumAndCount(values, bucket) 111 112 // Cannot take average of no values 113 if count == 0 { 114 return math.NaN() 115 } 116 117 return sum / count 118 } 119 120 func stddevFn(values []float64, bucket []int) float64 { 121 return math.Sqrt(varianceFn(values, bucket)) 122 } 123 124 func varianceFn(values []float64, bucket []int) float64 { 125 if len(values) == 0 || len(bucket) == 0 { 126 return math.NaN() 127 } 128 129 // Using Welford's online algorithm for calculating variance 130 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm 131 // 132 // This algorithm is used in Prometheus and also should provide better numerical precision than 133 // the straight-forward implementation of the variance formula. The algorithm iterates through the values 134 // and at the each step recalculates mean and variance of the values seen so far. 135 136 var ( 137 count = 0 138 partialMean = 0.0 139 partialVarTimesCount = 0.0 // for better precision, calculate `variance * count` and divide at the end 140 ) 141 142 for _, idx := range bucket { 143 v := values[idx] 144 if !math.IsNaN(v) { 145 count++ 146 147 delta1 := v - partialMean 148 partialMean += delta1 / float64(count) 149 delta2 := v - partialMean 150 151 partialVarTimesCount += delta1 * delta2 152 } 153 } 154 155 if count < 1 { 156 return math.NaN() 157 } 158 159 return partialVarTimesCount / float64(count) 160 } 161 162 func countFn(values []float64, bucket []int) float64 { 163 _, count := sumAndCount(values, bucket) 164 return count 165 }