github.com/m3db/m3@v1.5.0/src/query/functions/temporal/linear_regression.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package temporal 22 23 import ( 24 "fmt" 25 "math" 26 "time" 27 28 "github.com/m3db/m3/src/query/executor/transform" 29 "github.com/m3db/m3/src/query/ts" 30 xtime "github.com/m3db/m3/src/x/time" 31 ) 32 33 const ( 34 // PredictLinearType predicts the value of time series t seconds from now, 35 // based on the input series, using simple linear regression. 36 // PredictLinearType should only be used with gauges. 37 PredictLinearType = "predict_linear" 38 39 // DerivType calculates the per-second derivative of the time series, 40 // using simple linear regression. 41 // DerivType should only be used with gauges. 42 DerivType = "deriv" 43 ) 44 45 type linearRegressionProcessor struct { 46 fn linearRegFn 47 isDeriv bool 48 } 49 50 func (l linearRegressionProcessor) initialize( 51 _ time.Duration, 52 opts transform.Options, 53 ) processor { 54 return &linearRegressionNode{ 55 timeSpec: opts.TimeSpec(), 56 fn: l.fn, 57 isDeriv: l.isDeriv, 58 } 59 } 60 61 type linearRegFn func(float64, float64) float64 62 63 // NewLinearRegressionOp creates a new base temporal transform 64 // for linear regression functions. 65 func NewLinearRegressionOp( 66 args []interface{}, 67 optype string, 68 ) (transform.Params, error) { 69 var ( 70 fn linearRegFn 71 isDeriv bool 72 ) 73 74 switch optype { 75 case PredictLinearType: 76 if len(args) != 2 { 77 return emptyOp, fmt.Errorf("invalid number of args for %s: %d", 78 PredictLinearType, len(args)) 79 } 80 81 duration, ok := args[1].(float64) 82 if !ok { 83 return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s", 84 args[1], PredictLinearType) 85 } 86 87 fn = func(slope, intercept float64) float64 { 88 return slope*duration + intercept 89 } 90 91 case DerivType: 92 if len(args) != 1 { 93 return emptyOp, fmt.Errorf("invalid number of args for %s: %d", 94 DerivType, len(args)) 95 } 96 97 fn = func(slope, _ float64) float64 { 98 return slope 99 } 100 101 isDeriv = true 102 103 default: 104 return nil, fmt.Errorf("unknown linear regression type: %s", optype) 105 } 106 107 duration, ok := args[0].(time.Duration) 108 if !ok { 109 return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s", 110 args[0], optype) 111 } 112 113 l := linearRegressionProcessor{ 114 fn: fn, 115 isDeriv: isDeriv, 116 } 117 118 return newBaseOp(duration, optype, l) 119 } 120 121 type linearRegressionNode struct { 122 timeSpec transform.TimeSpec 123 fn linearRegFn 124 isDeriv bool 125 } 126 127 func (l linearRegressionNode) process( 128 dps ts.Datapoints, 129 iterBounds iterationBounds, 130 ) float64 { 131 if dps.Len() < 2 { 132 return math.NaN() 133 } 134 135 evaluationTime := iterBounds.end 136 slope, intercept := linearRegression(dps, evaluationTime, l.isDeriv) 137 return l.fn(slope, intercept) 138 } 139 140 func subSeconds(from xtime.UnixNano, sub xtime.UnixNano) float64 { 141 return float64(from-sub) / float64(time.Second) 142 } 143 144 // linearRegression performs a least-square linear regression analysis on the 145 // provided datapoints. It returns the slope, and the intercept value at the 146 // provided time. 147 // Uses this algorithm: https://en.wikipedia.org/wiki/Simple_linear_regression. 148 func linearRegression( 149 dps ts.Datapoints, 150 interceptTime xtime.UnixNano, 151 isDeriv bool, 152 ) (float64, float64) { 153 var ( 154 n float64 155 sumTimeDiff, sumVals float64 156 sumTimeDiffVals, sumTimeDiffSquared float64 157 valueCount int 158 ) 159 160 for _, dp := range dps { 161 if math.IsNaN(dp.Value) { 162 continue 163 } 164 165 if valueCount == 0 && isDeriv { 166 // set interceptTime as timestamp of first non-NaN dp 167 interceptTime = dp.Timestamp 168 } 169 170 valueCount++ 171 timeDiff := subSeconds(dp.Timestamp, interceptTime) 172 n += 1.0 173 sumVals += dp.Value 174 sumTimeDiff += timeDiff 175 sumTimeDiffVals += timeDiff * dp.Value 176 sumTimeDiffSquared += timeDiff * timeDiff 177 } 178 179 // need at least 2 non-NaN values to calculate slope and intercept 180 if valueCount == 1 { 181 return math.NaN(), math.NaN() 182 } 183 184 covXY := sumTimeDiffVals - sumTimeDiff*sumVals/n 185 varX := sumTimeDiffSquared - sumTimeDiff*sumTimeDiff/n 186 187 slope := covXY / varX 188 intercept := sumVals/n - slope*sumTimeDiff/n 189 190 return slope, intercept 191 }