github.com/livekit/protocol@v1.16.1-0.20240517185851-47e4c6bba773/utils/timeseries/timeseries.go (about)

     1  // Copyright 2023 LiveKit, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package timeseries
    16  
    17  import (
    18  	"container/list"
    19  	"errors"
    20  	"fmt"
    21  	"math"
    22  	"time"
    23  )
    24  
    25  // ------------------------------------------------
    26  
    27  var (
    28  	errNotEnoughSamples = errors.New("not enough samples")
    29  )
    30  
    31  // ------------------------------------------------
    32  
    33  type TimeSeriesUpdateOp int
    34  
    35  const (
    36  	TimeSeriesUpdateOpAdd TimeSeriesUpdateOp = iota
    37  	TimeSeriesUpdateOpMax
    38  	TimeSeriesUpdateOpLatest
    39  )
    40  
    41  func (t TimeSeriesUpdateOp) String() string {
    42  	switch t {
    43  	case TimeSeriesUpdateOpAdd:
    44  		return "ADD"
    45  	case TimeSeriesUpdateOpMax:
    46  		return "MAX"
    47  	case TimeSeriesUpdateOpLatest:
    48  		return "LATEST"
    49  	default:
    50  		return fmt.Sprintf("%d", int(t))
    51  	}
    52  }
    53  
    54  // ------------------------------------------------
    55  
    56  type TimeSeriesCompareOp int
    57  
    58  const (
    59  	TimeSeriesCompareOpEQ TimeSeriesCompareOp = iota
    60  	TimeSeriesCompareOpNE
    61  	TimeSeriesCompareOpGT
    62  	TimeSeriesCompareOpGTE
    63  	TimeSeriesCompareOpLT
    64  	TimeSeriesCompareOpLTE
    65  )
    66  
    67  func (t TimeSeriesCompareOp) String() string {
    68  	switch t {
    69  	case TimeSeriesCompareOpEQ:
    70  		return "EQ"
    71  	case TimeSeriesCompareOpNE:
    72  		return "NE"
    73  	case TimeSeriesCompareOpGT:
    74  		return "GT"
    75  	case TimeSeriesCompareOpGTE:
    76  		return "GTE"
    77  	case TimeSeriesCompareOpLT:
    78  		return "LT"
    79  	case TimeSeriesCompareOpLTE:
    80  		return "LTE"
    81  	default:
    82  		return fmt.Sprintf("%d", int(t))
    83  	}
    84  }
    85  
    86  // ------------------------------------------------
    87  
    88  type ReverseIterator[T number] struct {
    89  	limit time.Time
    90  	e     *list.Element
    91  	s     TimeSeriesSample[T]
    92  }
    93  
    94  func (it *ReverseIterator[T]) Next() bool {
    95  	if it.e == nil {
    96  		return false
    97  	}
    98  
    99  	it.s = it.e.Value.(TimeSeriesSample[T])
   100  	it.e = it.e.Prev()
   101  	return it.s.At.After(it.limit)
   102  }
   103  
   104  func (it *ReverseIterator[T]) Value() TimeSeriesSample[T] {
   105  	return it.s
   106  }
   107  
   108  // ------------------------------------------------
   109  
   110  type number interface {
   111  	uint32 | uint64 | int | int32 | int64 | float32 | float64
   112  }
   113  
   114  type TimeSeriesSample[T number] struct {
   115  	Value T
   116  	At    time.Time
   117  }
   118  
   119  type TimeSeriesParams struct {
   120  	UpdateOp         TimeSeriesUpdateOp
   121  	Window           time.Duration
   122  	CollapseDuration time.Duration
   123  }
   124  
   125  type TimeSeries[T number] struct {
   126  	params TimeSeriesParams
   127  
   128  	samples        *list.List
   129  	activeSample   T
   130  	isActiveSample bool
   131  
   132  	welfordCount int
   133  	welfordM     float64
   134  	welfordS     float64
   135  	welfordStart time.Time
   136  	welfordLast  time.Time
   137  }
   138  
   139  func NewTimeSeries[T number](params TimeSeriesParams) *TimeSeries[T] {
   140  	t := &TimeSeries[T]{
   141  		params:  params,
   142  		samples: list.New(),
   143  	}
   144  
   145  	t.initSamples()
   146  	return t
   147  }
   148  
   149  func (t *TimeSeries[T]) UpdateSample(val T) {
   150  	if !t.isActiveSample {
   151  		t.isActiveSample = true
   152  		t.activeSample = val
   153  		return
   154  	}
   155  
   156  	switch t.params.UpdateOp {
   157  	case TimeSeriesUpdateOpAdd:
   158  		t.activeSample += val
   159  	case TimeSeriesUpdateOpMax:
   160  		if val > t.activeSample {
   161  			t.activeSample = val
   162  		}
   163  	case TimeSeriesUpdateOpLatest:
   164  		t.activeSample = val
   165  	}
   166  }
   167  
   168  func (t *TimeSeries[T]) CommitActiveSample() {
   169  	t.CommitActiveSampleAt(time.Now())
   170  }
   171  
   172  func (t *TimeSeries[T]) CommitActiveSampleAt(at time.Time) {
   173  	if !t.isActiveSample {
   174  		return
   175  	}
   176  
   177  	t.addSampleAt(t.activeSample, at)
   178  	t.isActiveSample = false
   179  }
   180  
   181  func (t *TimeSeries[T]) AddSample(val T) {
   182  	t.AddSampleAt(val, time.Now())
   183  }
   184  
   185  func (t *TimeSeries[T]) AddSampleAt(val T, at time.Time) {
   186  	t.addSampleAt(val, at)
   187  }
   188  
   189  func (t *TimeSeries[T]) GetSamples() []TimeSeriesSample[T] {
   190  	t.prune()
   191  
   192  	samples := make([]TimeSeriesSample[T], 0, t.samples.Len())
   193  	for e := t.samples.Front(); e != nil; e = e.Next() {
   194  		samples = append(samples, e.Value.(TimeSeriesSample[T]))
   195  	}
   196  	return samples
   197  }
   198  
   199  func (t *TimeSeries[T]) GetSamplesAfter(at time.Time) []TimeSeriesSample[T] {
   200  	t.prune()
   201  
   202  	samples := make([]TimeSeriesSample[T], 0, t.samples.Len())
   203  	for e := t.samples.Front(); e != nil; e = e.Next() {
   204  		s := e.Value.(TimeSeriesSample[T])
   205  		if s.At.After(at) {
   206  			samples = append(samples, s)
   207  		}
   208  	}
   209  	return samples
   210  }
   211  
   212  func (t *TimeSeries[T]) ReverseIterateSamplesAfter(at time.Time) ReverseIterator[T] {
   213  	t.prune()
   214  
   215  	return ReverseIterator[T]{
   216  		limit: at,
   217  		e:     t.samples.Back(),
   218  	}
   219  }
   220  
   221  func (t *TimeSeries[T]) ClearSamples() {
   222  	t.initSamples()
   223  }
   224  
   225  func (t *TimeSeries[T]) Sum() float64 {
   226  	t.prune()
   227  
   228  	sum := float64(0.0)
   229  	for e := t.samples.Front(); e != nil; e = e.Next() {
   230  		s := e.Value.(TimeSeriesSample[T])
   231  		sum += float64(s.Value)
   232  	}
   233  
   234  	return sum
   235  }
   236  
   237  func (t *TimeSeries[T]) HasSamplesAfter(at time.Time) bool {
   238  	t.prune()
   239  
   240  	if e := t.samples.Back(); e != nil {
   241  		return e.Value.(TimeSeriesSample[T]).At.After(at)
   242  	}
   243  	return false
   244  }
   245  
   246  func (t *TimeSeries[T]) Back() TimeSeriesSample[T] {
   247  	t.prune()
   248  
   249  	if e := t.samples.Back(); e != nil {
   250  		return e.Value.(TimeSeriesSample[T])
   251  	}
   252  	return TimeSeriesSample[T]{}
   253  }
   254  
   255  func (t *TimeSeries[T]) Min() T {
   256  	t.prune()
   257  
   258  	return t.minLocked(t.samples.Len())
   259  }
   260  
   261  func (t *TimeSeries[T]) minLocked(numSamples int) T {
   262  	min := T(0)
   263  	for e, samplesSeen := t.samples.Back(), 0; e != nil && samplesSeen < numSamples; e, samplesSeen = e.Prev(), samplesSeen+1 {
   264  		s := e.Value.(TimeSeriesSample[T])
   265  		if min == T(0) || min > s.Value {
   266  			min = s.Value
   267  		}
   268  	}
   269  
   270  	return min
   271  }
   272  
   273  func (t *TimeSeries[T]) Max() T {
   274  	t.prune()
   275  
   276  	return t.maxLocked(t.samples.Len())
   277  }
   278  
   279  func (t *TimeSeries[T]) maxLocked(numSamples int) T {
   280  	max := T(0)
   281  	for e, samplesSeen := t.samples.Back(), 0; e != nil && samplesSeen < numSamples; e, samplesSeen = e.Prev(), samplesSeen+1 {
   282  		s := e.Value.(TimeSeriesSample[T])
   283  		if max < s.Value {
   284  			max = s.Value
   285  		}
   286  	}
   287  
   288  	return max
   289  }
   290  
   291  func (t *TimeSeries[T]) CurrentRun(threshold T, op TimeSeriesCompareOp) time.Duration {
   292  	t.prune()
   293  
   294  	start := time.Time{}
   295  	end := time.Time{}
   296  
   297  	for e := t.samples.Back(); e != nil; e = e.Prev() {
   298  		cond := false
   299  		s := e.Value.(TimeSeriesSample[T])
   300  		switch op {
   301  		case TimeSeriesCompareOpEQ:
   302  			cond = s.Value == threshold
   303  		case TimeSeriesCompareOpNE:
   304  			cond = s.Value != threshold
   305  		case TimeSeriesCompareOpGT:
   306  			cond = s.Value > threshold
   307  		case TimeSeriesCompareOpGTE:
   308  			cond = s.Value >= threshold
   309  		case TimeSeriesCompareOpLT:
   310  			cond = s.Value < threshold
   311  		case TimeSeriesCompareOpLTE:
   312  			cond = s.Value <= threshold
   313  		}
   314  		if !cond {
   315  			break
   316  		}
   317  		if end.IsZero() {
   318  			end = s.At
   319  		}
   320  		start = s.At
   321  	}
   322  
   323  	if end.IsZero() || start.IsZero() {
   324  		return 0
   325  	}
   326  
   327  	return end.Sub(start)
   328  }
   329  
   330  func (t *TimeSeries[T]) OnlineAverage() float64 {
   331  	return t.welfordM
   332  }
   333  
   334  func (t *TimeSeries[T]) OnlineVariance() float64 {
   335  	return t.onlineVarianceLocked()
   336  }
   337  
   338  func (t *TimeSeries[T]) onlineVarianceLocked() float64 {
   339  	if t.welfordCount > 1 {
   340  		return t.welfordS / float64(t.welfordCount-1)
   341  	}
   342  
   343  	return 0.0
   344  }
   345  
   346  func (t *TimeSeries[T]) OnlineStdDev() float64 {
   347  	return t.onlineStdDevLocked()
   348  }
   349  
   350  func (t *TimeSeries[T]) onlineStdDevLocked() float64 {
   351  	return math.Sqrt(t.onlineVarianceLocked())
   352  }
   353  
   354  func (t *TimeSeries[T]) ZScore(val T) float64 {
   355  	onlineStdDev := t.onlineStdDevLocked()
   356  	if onlineStdDev != 0.0 {
   357  		return (float64(val) - t.welfordM) / onlineStdDev
   358  	}
   359  
   360  	return 0.0
   361  }
   362  
   363  func (t *TimeSeries[T]) Slope() float64 {
   364  	t.prune()
   365  
   366  	numSamples := t.samples.Len()
   367  	slope, _, _, _ := t.linearFitLocked(numSamples)
   368  
   369  	// convert to angle to normalize between -90deg to +90deg
   370  	return math.Atan(slope) * 180 / math.Pi
   371  }
   372  
   373  func (t *TimeSeries[T]) linearFitLocked(numSamples int) (slope float64, intercept float64, startedAt time.Time, endedAt time.Time) {
   374  	// go back numSamples first
   375  	e := t.samples.Back()
   376  	for i := 1; i < numSamples && e != nil; i++ {
   377  		e = e.Prev()
   378  	}
   379  
   380  	if e == nil {
   381  		// not enough samples
   382  		return
   383  	}
   384  
   385  	sx := float64(0.0)
   386  	sxsq := float64(0.0)
   387  	sy := float64(0.0)
   388  	sysq := float64(0.0)
   389  	sxy := float64(0.0)
   390  
   391  	for ; e != nil; e = e.Next() {
   392  		s := e.Value.(TimeSeriesSample[T])
   393  		if startedAt.IsZero() {
   394  			startedAt = s.At
   395  		}
   396  		if endedAt.IsZero() || s.At.After(endedAt) {
   397  			endedAt = s.At
   398  		}
   399  
   400  		x := s.At.Sub(startedAt).Seconds()
   401  		y := float64(s.Value)
   402  
   403  		sx += x
   404  		sxsq += x * x
   405  
   406  		sy += y
   407  		sysq += y * y
   408  
   409  		sxy += x * y
   410  	}
   411  
   412  	N := float64(numSamples)
   413  	sxwsq := sx * sx
   414  	denom := N*sxsq - sxwsq
   415  	if denom != 0.0 {
   416  		slope = (N*sxy - sx*sy) / denom
   417  	}
   418  	intercept = (sy - slope*sx) / N
   419  	return
   420  }
   421  
   422  func (t *TimeSeries[T]) LinearExtrapolateTo(numSamplesToUse int, after time.Duration) (float64, error) {
   423  	t.prune()
   424  
   425  	slope, intercept, startedAt, endedAt := t.linearFitLocked(numSamplesToUse)
   426  	if startedAt.IsZero() {
   427  		return 0, errNotEnoughSamples
   428  	}
   429  
   430  	x := endedAt.Add(after).Sub(startedAt).Seconds()
   431  	y := slope*x + intercept
   432  	return y, nil
   433  }
   434  
   435  func (t *TimeSeries[T]) KendallsTau(numSamplesToUse int) (float64, error) {
   436  	t.prune()
   437  
   438  	if t.samples.Len() < numSamplesToUse {
   439  		return 0.0, errNotEnoughSamples
   440  	}
   441  
   442  	values := make([]T, numSamplesToUse)
   443  	idx := numSamplesToUse - 1
   444  	for e := t.samples.Back(); e != nil; e = e.Prev() {
   445  		if idx < 0 {
   446  			break
   447  		}
   448  
   449  		s := e.Value.(TimeSeriesSample[T])
   450  		values[idx] = s.Value
   451  		idx--
   452  	}
   453  
   454  	concordantPairs := 0
   455  	discordantPairs := 0
   456  	for i := 0; i < len(values)-1; i++ {
   457  		for j := i + 1; j < len(values); j++ {
   458  			if values[i] < values[j] {
   459  				concordantPairs++
   460  			} else if values[i] > values[j] {
   461  				discordantPairs++
   462  			}
   463  		}
   464  	}
   465  
   466  	if (concordantPairs + discordantPairs) == 0 {
   467  		return 0.0, nil
   468  	}
   469  
   470  	return (float64(concordantPairs) - float64(discordantPairs)) / (float64(concordantPairs) + float64(discordantPairs)), nil
   471  }
   472  
   473  func (t *TimeSeries[T]) initSamples() {
   474  	t.samples = t.samples.Init()
   475  }
   476  
   477  func (t *TimeSeries[T]) addSampleAt(val T, at time.Time) {
   478  	// insert in time order
   479  	e := t.samples.Back()
   480  	if e != nil {
   481  		lastSample := e.Value.(TimeSeriesSample[T])
   482  		if val == lastSample.Value && at.Sub(lastSample.At) < t.params.CollapseDuration {
   483  			// repeated value within collapse duration
   484  			t.prune()
   485  			return
   486  		}
   487  	}
   488  	for e = t.samples.Back(); e != nil; e = e.Prev() {
   489  		s := e.Value.(TimeSeriesSample[T])
   490  		if at.After(s.At) {
   491  			break
   492  		}
   493  	}
   494  
   495  	sample := TimeSeriesSample[T]{
   496  		Value: val,
   497  		At:    at,
   498  	}
   499  	switch {
   500  	case e != nil: // in the middle
   501  		t.samples.InsertAfter(sample, e)
   502  
   503  	case t.samples.Front() != nil: // in the front
   504  		t.samples.PushFront(sample)
   505  
   506  	default: // at the end
   507  		t.samples.PushBack(sample)
   508  	}
   509  
   510  	t.updateWelfordStats(val, at)
   511  
   512  	t.prune()
   513  }
   514  
   515  func (t *TimeSeries[T]) updateWelfordStats(val T, at time.Time) {
   516  	t.welfordCount++
   517  	mLast := t.welfordM
   518  	t.welfordM += (float64(val) - t.welfordM) / float64(t.welfordCount)
   519  	t.welfordS += (float64(val) - mLast) * (float64(val) - t.welfordM)
   520  
   521  	if t.welfordStart.IsZero() {
   522  		t.welfordStart = at
   523  	}
   524  	t.welfordLast = at
   525  }
   526  
   527  func (t *TimeSeries[T]) prune() {
   528  	thresh := t.welfordLast.Add(-t.params.Window)
   529  	//thresh := time.Now().Add(-t.params.Window)
   530  
   531  	for next := t.samples.Front(); next != nil; {
   532  		e := next
   533  		s := e.Value.(TimeSeriesSample[T])
   534  		if s.At.After(thresh) {
   535  			break
   536  		}
   537  		next = e.Next()
   538  
   539  		t.samples.Remove(e)
   540  	}
   541  }
   542  
   543  // TODO - a bunch of stats
   544  // - sum
   545  // - moving average
   546  // - EWMA
   547  // - min
   548  // - max
   549  // - average
   550  // - median
   551  // - variance
   552  // - stddev
   553  // - trend
   554  // - run
   555  // - z-score