github.com/m3db/m3@v1.5.0/src/query/functions/temporal/linear_regression.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package temporal
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  	"time"
    27  
    28  	"github.com/m3db/m3/src/query/executor/transform"
    29  	"github.com/m3db/m3/src/query/ts"
    30  	xtime "github.com/m3db/m3/src/x/time"
    31  )
    32  
    33  const (
    34  	// PredictLinearType predicts the value of time series t seconds from now,
    35  	// based on the input series, using simple linear regression.
    36  	// PredictLinearType should only be used with gauges.
    37  	PredictLinearType = "predict_linear"
    38  
    39  	// DerivType calculates the per-second derivative of the time series,
    40  	// using simple linear regression.
    41  	// DerivType should only be used with gauges.
    42  	DerivType = "deriv"
    43  )
    44  
    45  type linearRegressionProcessor struct {
    46  	fn      linearRegFn
    47  	isDeriv bool
    48  }
    49  
    50  func (l linearRegressionProcessor) initialize(
    51  	_ time.Duration,
    52  	opts transform.Options,
    53  ) processor {
    54  	return &linearRegressionNode{
    55  		timeSpec: opts.TimeSpec(),
    56  		fn:       l.fn,
    57  		isDeriv:  l.isDeriv,
    58  	}
    59  }
    60  
    61  type linearRegFn func(float64, float64) float64
    62  
    63  // NewLinearRegressionOp creates a new base temporal transform
    64  // for linear regression functions.
    65  func NewLinearRegressionOp(
    66  	args []interface{},
    67  	optype string,
    68  ) (transform.Params, error) {
    69  	var (
    70  		fn      linearRegFn
    71  		isDeriv bool
    72  	)
    73  
    74  	switch optype {
    75  	case PredictLinearType:
    76  		if len(args) != 2 {
    77  			return emptyOp, fmt.Errorf("invalid number of args for %s: %d",
    78  				PredictLinearType, len(args))
    79  		}
    80  
    81  		duration, ok := args[1].(float64)
    82  		if !ok {
    83  			return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s",
    84  				args[1], PredictLinearType)
    85  		}
    86  
    87  		fn = func(slope, intercept float64) float64 {
    88  			return slope*duration + intercept
    89  		}
    90  
    91  	case DerivType:
    92  		if len(args) != 1 {
    93  			return emptyOp, fmt.Errorf("invalid number of args for %s: %d",
    94  				DerivType, len(args))
    95  		}
    96  
    97  		fn = func(slope, _ float64) float64 {
    98  			return slope
    99  		}
   100  
   101  		isDeriv = true
   102  
   103  	default:
   104  		return nil, fmt.Errorf("unknown linear regression type: %s", optype)
   105  	}
   106  
   107  	duration, ok := args[0].(time.Duration)
   108  	if !ok {
   109  		return emptyOp, fmt.Errorf("unable to cast to scalar argument: %v for %s",
   110  			args[0], optype)
   111  	}
   112  
   113  	l := linearRegressionProcessor{
   114  		fn:      fn,
   115  		isDeriv: isDeriv,
   116  	}
   117  
   118  	return newBaseOp(duration, optype, l)
   119  }
   120  
   121  type linearRegressionNode struct {
   122  	timeSpec transform.TimeSpec
   123  	fn       linearRegFn
   124  	isDeriv  bool
   125  }
   126  
   127  func (l linearRegressionNode) process(
   128  	dps ts.Datapoints,
   129  	iterBounds iterationBounds,
   130  ) float64 {
   131  	if dps.Len() < 2 {
   132  		return math.NaN()
   133  	}
   134  
   135  	evaluationTime := iterBounds.end
   136  	slope, intercept := linearRegression(dps, evaluationTime, l.isDeriv)
   137  	return l.fn(slope, intercept)
   138  }
   139  
   140  func subSeconds(from xtime.UnixNano, sub xtime.UnixNano) float64 {
   141  	return float64(from-sub) / float64(time.Second)
   142  }
   143  
   144  // linearRegression performs a least-square linear regression analysis on the
   145  // provided datapoints. It returns the slope, and the intercept value at the
   146  // provided time.
   147  // Uses this algorithm: https://en.wikipedia.org/wiki/Simple_linear_regression.
   148  func linearRegression(
   149  	dps ts.Datapoints,
   150  	interceptTime xtime.UnixNano,
   151  	isDeriv bool,
   152  ) (float64, float64) {
   153  	var (
   154  		n                                   float64
   155  		sumTimeDiff, sumVals                float64
   156  		sumTimeDiffVals, sumTimeDiffSquared float64
   157  		valueCount                          int
   158  	)
   159  
   160  	for _, dp := range dps {
   161  		if math.IsNaN(dp.Value) {
   162  			continue
   163  		}
   164  
   165  		if valueCount == 0 && isDeriv {
   166  			// set interceptTime as timestamp of first non-NaN dp
   167  			interceptTime = dp.Timestamp
   168  		}
   169  
   170  		valueCount++
   171  		timeDiff := subSeconds(dp.Timestamp, interceptTime)
   172  		n += 1.0
   173  		sumVals += dp.Value
   174  		sumTimeDiff += timeDiff
   175  		sumTimeDiffVals += timeDiff * dp.Value
   176  		sumTimeDiffSquared += timeDiff * timeDiff
   177  	}
   178  
   179  	// need at least 2 non-NaN values to calculate slope and intercept
   180  	if valueCount == 1 {
   181  		return math.NaN(), math.NaN()
   182  	}
   183  
   184  	covXY := sumTimeDiffVals - sumTimeDiff*sumVals/n
   185  	varX := sumTimeDiffSquared - sumTimeDiff*sumTimeDiff/n
   186  
   187  	slope := covXY / varX
   188  	intercept := sumVals/n - slope*sumTimeDiff/n
   189  
   190  	return slope, intercept
   191  }