github.com/whoyao/protocol@v0.0.0-20230519045905-2d8ace718ca5/utils/timeseries/timeseries.go (about)

     1  package timeseries
     2  
     3  import (
     4  	"container/list"
     5  	"errors"
     6  	"fmt"
     7  	"math"
     8  	"sync"
     9  	"time"
    10  )
    11  
    12  // ------------------------------------------------
    13  
    14  var (
    15  	errNotEnoughSamples = errors.New("not enough samples")
    16  )
    17  
    18  // ------------------------------------------------
    19  
    20  type TimeSeriesUpdateOp int
    21  
    22  const (
    23  	TimeSeriesUpdateOpAdd TimeSeriesUpdateOp = iota
    24  	TimeSeriesUpdateOpMax
    25  	TimeSeriesUpdateOpLatest
    26  )
    27  
    28  func (t TimeSeriesUpdateOp) String() string {
    29  	switch t {
    30  	case TimeSeriesUpdateOpAdd:
    31  		return "ADD"
    32  	case TimeSeriesUpdateOpMax:
    33  		return "MAX"
    34  	case TimeSeriesUpdateOpLatest:
    35  		return "LATEST"
    36  	default:
    37  		return fmt.Sprintf("%d", int(t))
    38  	}
    39  }
    40  
    41  // ------------------------------------------------
    42  
    43  type TimeSeriesCompareOp int
    44  
    45  const (
    46  	TimeSeriesCompareOpEQ TimeSeriesCompareOp = iota
    47  	TimeSeriesCompareOpNE
    48  	TimeSeriesCompareOpGT
    49  	TimeSeriesCompareOpGTE
    50  	TimeSeriesCompareOpLT
    51  	TimeSeriesCompareOpLTE
    52  )
    53  
    54  func (t TimeSeriesCompareOp) String() string {
    55  	switch t {
    56  	case TimeSeriesCompareOpEQ:
    57  		return "EQ"
    58  	case TimeSeriesCompareOpNE:
    59  		return "NE"
    60  	case TimeSeriesCompareOpGT:
    61  		return "GT"
    62  	case TimeSeriesCompareOpGTE:
    63  		return "GTE"
    64  	case TimeSeriesCompareOpLT:
    65  		return "LT"
    66  	case TimeSeriesCompareOpLTE:
    67  		return "LTE"
    68  	default:
    69  		return fmt.Sprintf("%d", int(t))
    70  	}
    71  }
    72  
    73  // ------------------------------------------------
    74  
    75  type number interface {
    76  	uint32 | uint64 | int | int32 | int64 | float32 | float64
    77  }
    78  
    79  type TimeSeriesSample[T number] struct {
    80  	Value T
    81  	At    time.Time
    82  }
    83  
    84  type TimeSeriesParams struct {
    85  	UpdateOp         TimeSeriesUpdateOp
    86  	Window           time.Duration
    87  	CollapseDuration time.Duration
    88  }
    89  
    90  type TimeSeries[T number] struct {
    91  	params TimeSeriesParams
    92  
    93  	lock           sync.RWMutex
    94  	samples        *list.List
    95  	activeSample   T
    96  	isActiveSample bool
    97  
    98  	welfordCount int
    99  	welfordM     float64
   100  	welfordS     float64
   101  	welfordStart time.Time
   102  	welfordLast  time.Time
   103  }
   104  
   105  func NewTimeSeries[T number](params TimeSeriesParams) *TimeSeries[T] {
   106  	t := &TimeSeries[T]{
   107  		params:  params,
   108  		samples: list.New(),
   109  	}
   110  
   111  	t.initSamples()
   112  	return t
   113  }
   114  
   115  func (t *TimeSeries[T]) UpdateSample(val T) {
   116  	t.lock.Lock()
   117  	defer t.lock.Unlock()
   118  
   119  	if !t.isActiveSample {
   120  		t.isActiveSample = true
   121  		t.activeSample = val
   122  		return
   123  	}
   124  
   125  	switch t.params.UpdateOp {
   126  	case TimeSeriesUpdateOpAdd:
   127  		t.activeSample += val
   128  	case TimeSeriesUpdateOpMax:
   129  		if val > t.activeSample {
   130  			t.activeSample = val
   131  		}
   132  	case TimeSeriesUpdateOpLatest:
   133  		t.activeSample = val
   134  	}
   135  }
   136  
   137  func (t *TimeSeries[T]) CommitActiveSample() {
   138  	t.CommitActiveSampleAt(time.Now())
   139  }
   140  
   141  func (t *TimeSeries[T]) CommitActiveSampleAt(at time.Time) {
   142  	t.lock.Lock()
   143  	defer t.lock.Unlock()
   144  
   145  	if !t.isActiveSample {
   146  		return
   147  	}
   148  
   149  	t.addSampleAt(t.activeSample, at)
   150  	t.isActiveSample = false
   151  }
   152  
   153  func (t *TimeSeries[T]) AddSample(val T) {
   154  	t.AddSampleAt(val, time.Now())
   155  }
   156  
   157  func (t *TimeSeries[T]) AddSampleAt(val T, at time.Time) {
   158  	t.lock.Lock()
   159  	defer t.lock.Unlock()
   160  
   161  	t.addSampleAt(val, at)
   162  }
   163  
   164  func (t *TimeSeries[T]) GetSamples() []TimeSeriesSample[T] {
   165  	t.lock.Lock()
   166  	defer t.lock.Unlock()
   167  
   168  	t.prune()
   169  
   170  	samples := make([]TimeSeriesSample[T], 0, t.samples.Len())
   171  	for e := t.samples.Front(); e != nil; e = e.Next() {
   172  		samples = append(samples, e.Value.(TimeSeriesSample[T]))
   173  	}
   174  	return samples
   175  }
   176  
   177  func (t *TimeSeries[T]) GetSamplesAfter(at time.Time) []TimeSeriesSample[T] {
   178  	t.lock.Lock()
   179  	defer t.lock.Unlock()
   180  
   181  	t.prune()
   182  
   183  	samples := make([]TimeSeriesSample[T], 0, t.samples.Len())
   184  	for e := t.samples.Front(); e != nil; e = e.Next() {
   185  		s := e.Value.(TimeSeriesSample[T])
   186  		if s.At.After(at) {
   187  			samples = append(samples, s)
   188  		}
   189  	}
   190  	return samples
   191  }
   192  
   193  func (t *TimeSeries[T]) ClearSamples() {
   194  	t.lock.Lock()
   195  	defer t.lock.Unlock()
   196  
   197  	t.initSamples()
   198  }
   199  
   200  func (t *TimeSeries[T]) Sum() float64 {
   201  	t.lock.Lock()
   202  	defer t.lock.Unlock()
   203  
   204  	t.prune()
   205  
   206  	sum := float64(0.0)
   207  	for e := t.samples.Front(); e != nil; e = e.Next() {
   208  		s := e.Value.(TimeSeriesSample[T])
   209  		sum += float64(s.Value)
   210  	}
   211  
   212  	return sum
   213  }
   214  
   215  func (t *TimeSeries[T]) Min() T {
   216  	t.lock.Lock()
   217  	defer t.lock.Unlock()
   218  
   219  	t.prune()
   220  
   221  	return t.minLocked(t.samples.Len())
   222  }
   223  
   224  func (t *TimeSeries[T]) minLocked(numSamples int) T {
   225  	min := T(0)
   226  	for e, samplesSeen := t.samples.Back(), 0; e != nil && samplesSeen < numSamples; e, samplesSeen = e.Prev(), samplesSeen+1 {
   227  		s := e.Value.(TimeSeriesSample[T])
   228  		if min == T(0) || min > s.Value {
   229  			min = s.Value
   230  		}
   231  	}
   232  
   233  	return min
   234  }
   235  
   236  func (t *TimeSeries[T]) Max() T {
   237  	t.lock.Lock()
   238  	defer t.lock.Unlock()
   239  
   240  	t.prune()
   241  
   242  	return t.maxLocked(t.samples.Len())
   243  }
   244  
   245  func (t *TimeSeries[T]) maxLocked(numSamples int) T {
   246  	max := T(0)
   247  	for e, samplesSeen := t.samples.Back(), 0; e != nil && samplesSeen < numSamples; e, samplesSeen = e.Prev(), samplesSeen+1 {
   248  		s := e.Value.(TimeSeriesSample[T])
   249  		if max < s.Value {
   250  			max = s.Value
   251  		}
   252  	}
   253  
   254  	return max
   255  }
   256  
   257  func (t *TimeSeries[T]) CurrentRun(threshold T, op TimeSeriesCompareOp) time.Duration {
   258  	t.lock.Lock()
   259  	defer t.lock.Unlock()
   260  
   261  	t.prune()
   262  
   263  	start := time.Time{}
   264  	end := time.Time{}
   265  
   266  	for e := t.samples.Back(); e != nil; e = e.Prev() {
   267  		cond := false
   268  		s := e.Value.(TimeSeriesSample[T])
   269  		switch op {
   270  		case TimeSeriesCompareOpEQ:
   271  			cond = s.Value == threshold
   272  		case TimeSeriesCompareOpNE:
   273  			cond = s.Value != threshold
   274  		case TimeSeriesCompareOpGT:
   275  			cond = s.Value > threshold
   276  		case TimeSeriesCompareOpGTE:
   277  			cond = s.Value >= threshold
   278  		case TimeSeriesCompareOpLT:
   279  			cond = s.Value < threshold
   280  		case TimeSeriesCompareOpLTE:
   281  			cond = s.Value <= threshold
   282  		}
   283  		if !cond {
   284  			break
   285  		}
   286  		if end.IsZero() {
   287  			end = s.At
   288  		}
   289  		start = s.At
   290  	}
   291  
   292  	if end.IsZero() || start.IsZero() {
   293  		return 0
   294  	}
   295  
   296  	return end.Sub(start)
   297  }
   298  
   299  func (t *TimeSeries[T]) OnlineAverage() float64 {
   300  	t.lock.RLock()
   301  	defer t.lock.RUnlock()
   302  
   303  	return t.welfordM
   304  }
   305  
   306  func (t *TimeSeries[T]) OnlineVariance() float64 {
   307  	t.lock.RLock()
   308  	defer t.lock.RUnlock()
   309  
   310  	return t.onlineVarianceLocked()
   311  }
   312  
   313  func (t *TimeSeries[T]) onlineVarianceLocked() float64 {
   314  	if t.welfordCount > 1 {
   315  		return t.welfordS / float64(t.welfordCount-1)
   316  	}
   317  
   318  	return 0.0
   319  }
   320  
   321  func (t *TimeSeries[T]) OnlineStdDev() float64 {
   322  	t.lock.RLock()
   323  	defer t.lock.RUnlock()
   324  
   325  	return t.onlineStdDevLocked()
   326  }
   327  
   328  func (t *TimeSeries[T]) onlineStdDevLocked() float64 {
   329  	return math.Sqrt(t.onlineVarianceLocked())
   330  }
   331  
   332  func (t *TimeSeries[T]) ZScore(val T) float64 {
   333  	t.lock.RLock()
   334  	defer t.lock.RUnlock()
   335  
   336  	onlineStdDev := t.onlineStdDevLocked()
   337  	if onlineStdDev != 0.0 {
   338  		return (float64(val) - t.welfordM) / t.onlineStdDevLocked()
   339  	}
   340  
   341  	return 0.0
   342  }
   343  
   344  func (t *TimeSeries[T]) Slope() float64 {
   345  	t.lock.Lock()
   346  	defer t.lock.Unlock()
   347  
   348  	t.prune()
   349  
   350  	numSamples := t.samples.Len()
   351  	slope, _, _, _ := t.linearFitLocked(numSamples)
   352  
   353  	// convert to angle to normalize between -90deg to +90deg
   354  	return math.Atan(slope) * 180 / math.Pi
   355  }
   356  
   357  func (t *TimeSeries[T]) linearFitLocked(numSamples int) (slope float64, intercept float64, startedAt time.Time, endedAt time.Time) {
   358  	// go back numSamples first
   359  	e := t.samples.Back()
   360  	for i := 1; i < numSamples && e != nil; i++ {
   361  		e = e.Prev()
   362  	}
   363  
   364  	if e == nil {
   365  		// not enough samples
   366  		return
   367  	}
   368  
   369  	sx := float64(0.0)
   370  	sxsq := float64(0.0)
   371  	sy := float64(0.0)
   372  	sysq := float64(0.0)
   373  	sxy := float64(0.0)
   374  
   375  	for ; e != nil; e = e.Next() {
   376  		s := e.Value.(TimeSeriesSample[T])
   377  		if startedAt.IsZero() {
   378  			startedAt = s.At
   379  		}
   380  		if endedAt.IsZero() || s.At.After(endedAt) {
   381  			endedAt = s.At
   382  		}
   383  
   384  		x := s.At.Sub(startedAt).Seconds()
   385  		y := float64(s.Value)
   386  
   387  		sx += x
   388  		sxsq += x * x
   389  
   390  		sy += y
   391  		sysq += y * y
   392  
   393  		sxy += x * y
   394  	}
   395  
   396  	N := float64(numSamples)
   397  	sxwsq := sx * sx
   398  	denom := N*sxsq - sxwsq
   399  	if denom != 0.0 {
   400  		slope = (N*sxy - sx*sy) / denom
   401  	}
   402  	intercept = (sy - slope*sx) / N
   403  	return
   404  }
   405  
   406  func (t *TimeSeries[T]) LinearExtrapolateTo(numSamplesToUse int, after time.Duration) (float64, error) {
   407  	t.lock.Lock()
   408  	defer t.lock.Unlock()
   409  
   410  	t.prune()
   411  
   412  	slope, intercept, startedAt, endedAt := t.linearFitLocked(numSamplesToUse)
   413  	if startedAt.IsZero() {
   414  		return 0, errNotEnoughSamples
   415  	}
   416  
   417  	x := endedAt.Add(after).Sub(startedAt).Seconds()
   418  	y := slope*x + intercept
   419  	return y, nil
   420  }
   421  
   422  func (t *TimeSeries[T]) KendallsTau(numSamplesToUse int) (float64, error) {
   423  	t.lock.Lock()
   424  	t.prune()
   425  
   426  	if t.samples.Len() < numSamplesToUse {
   427  		t.lock.Unlock()
   428  		return 0.0, errNotEnoughSamples
   429  	}
   430  
   431  	values := make([]T, numSamplesToUse)
   432  	idx := numSamplesToUse - 1
   433  	for e := t.samples.Back(); e != nil; e = e.Prev() {
   434  		if idx < 0 {
   435  			break
   436  		}
   437  
   438  		s := e.Value.(TimeSeriesSample[T])
   439  		values[idx] = s.Value
   440  		idx--
   441  	}
   442  	t.lock.Unlock()
   443  
   444  	concordantPairs := 0
   445  	discordantPairs := 0
   446  	for i := 0; i < len(values)-1; i++ {
   447  		for j := i + 1; j < len(values); j++ {
   448  			if values[i] < values[j] {
   449  				concordantPairs++
   450  			} else if values[i] > values[j] {
   451  				discordantPairs++
   452  			}
   453  		}
   454  	}
   455  
   456  	if (concordantPairs + discordantPairs) == 0 {
   457  		return 0.0, nil
   458  	}
   459  
   460  	return (float64(concordantPairs) - float64(discordantPairs)) / (float64(concordantPairs) + float64(discordantPairs)), nil
   461  }
   462  
   463  func (t *TimeSeries[T]) initSamples() {
   464  	t.samples = t.samples.Init()
   465  }
   466  
   467  func (t *TimeSeries[T]) addSampleAt(val T, at time.Time) {
   468  	// insert in time order
   469  	e := t.samples.Back()
   470  	if e != nil {
   471  		lastSample := e.Value.(TimeSeriesSample[T])
   472  		if val == lastSample.Value && at.Sub(lastSample.At) < t.params.CollapseDuration {
   473  			// repeated value within collapse duration
   474  			t.prune()
   475  			return
   476  		}
   477  	}
   478  	for e = t.samples.Back(); e != nil; e = e.Prev() {
   479  		s := e.Value.(TimeSeriesSample[T])
   480  		if at.After(s.At) {
   481  			break
   482  		}
   483  	}
   484  
   485  	sample := TimeSeriesSample[T]{
   486  		Value: val,
   487  		At:    at,
   488  	}
   489  	switch {
   490  	case e != nil: // in the middle
   491  		t.samples.InsertAfter(sample, e)
   492  
   493  	case t.samples.Front() != nil: // in the front
   494  		t.samples.PushFront(sample)
   495  
   496  	default: // at the end
   497  		t.samples.PushBack(sample)
   498  	}
   499  
   500  	t.updateWelfordStats(val, at)
   501  
   502  	t.prune()
   503  }
   504  
   505  func (t *TimeSeries[T]) updateWelfordStats(val T, at time.Time) {
   506  	t.welfordCount++
   507  	mLast := t.welfordM
   508  	t.welfordM += (float64(val) - t.welfordM) / float64(t.welfordCount)
   509  	t.welfordS += (float64(val) - mLast) * (float64(val) - t.welfordM)
   510  
   511  	if t.welfordStart.IsZero() {
   512  		t.welfordStart = at
   513  	}
   514  	t.welfordLast = at
   515  }
   516  
   517  func (t *TimeSeries[T]) prune() {
   518  	thresh := t.welfordLast.Add(-t.params.Window)
   519  	//thresh := time.Now().Add(-t.params.Window)
   520  
   521  	toRemove := make([]*list.Element, 0, t.samples.Len())
   522  	for e := t.samples.Front(); e != nil; e = e.Next() {
   523  		s := e.Value.(TimeSeriesSample[T])
   524  		if s.At.After(thresh) {
   525  			break
   526  		}
   527  
   528  		toRemove = append(toRemove, e)
   529  	}
   530  
   531  	for _, e := range toRemove {
   532  		t.samples.Remove(e)
   533  	}
   534  }
   535  
   536  // TODO - a bunch of stats
   537  // - sum
   538  // - moving average
   539  // - EWMA
   540  // - min
   541  // - max
   542  // - average
   543  // - median
   544  // - variance
   545  // - stddev
   546  // - trend
   547  // - run
   548  // - z-score