github.com/whoyao/protocol@v0.0.0-20230519045905-2d8ace718ca5/utils/timeseries/timeseries.go (about) 1 package timeseries 2 3 import ( 4 "container/list" 5 "errors" 6 "fmt" 7 "math" 8 "sync" 9 "time" 10 ) 11 12 // ------------------------------------------------ 13 14 var ( 15 errNotEnoughSamples = errors.New("not enough samples") 16 ) 17 18 // ------------------------------------------------ 19 20 type TimeSeriesUpdateOp int 21 22 const ( 23 TimeSeriesUpdateOpAdd TimeSeriesUpdateOp = iota 24 TimeSeriesUpdateOpMax 25 TimeSeriesUpdateOpLatest 26 ) 27 28 func (t TimeSeriesUpdateOp) String() string { 29 switch t { 30 case TimeSeriesUpdateOpAdd: 31 return "ADD" 32 case TimeSeriesUpdateOpMax: 33 return "MAX" 34 case TimeSeriesUpdateOpLatest: 35 return "LATEST" 36 default: 37 return fmt.Sprintf("%d", int(t)) 38 } 39 } 40 41 // ------------------------------------------------ 42 43 type TimeSeriesCompareOp int 44 45 const ( 46 TimeSeriesCompareOpEQ TimeSeriesCompareOp = iota 47 TimeSeriesCompareOpNE 48 TimeSeriesCompareOpGT 49 TimeSeriesCompareOpGTE 50 TimeSeriesCompareOpLT 51 TimeSeriesCompareOpLTE 52 ) 53 54 func (t TimeSeriesCompareOp) String() string { 55 switch t { 56 case TimeSeriesCompareOpEQ: 57 return "EQ" 58 case TimeSeriesCompareOpNE: 59 return "NE" 60 case TimeSeriesCompareOpGT: 61 return "GT" 62 case TimeSeriesCompareOpGTE: 63 return "GTE" 64 case TimeSeriesCompareOpLT: 65 return "LT" 66 case TimeSeriesCompareOpLTE: 67 return "LTE" 68 default: 69 return fmt.Sprintf("%d", int(t)) 70 } 71 } 72 73 // ------------------------------------------------ 74 75 type number interface { 76 uint32 | uint64 | int | int32 | int64 | float32 | float64 77 } 78 79 type TimeSeriesSample[T number] struct { 80 Value T 81 At time.Time 82 } 83 84 type TimeSeriesParams struct { 85 UpdateOp TimeSeriesUpdateOp 86 Window time.Duration 87 CollapseDuration time.Duration 88 } 89 90 type TimeSeries[T number] struct { 91 params TimeSeriesParams 92 93 lock sync.RWMutex 94 samples *list.List 95 activeSample T 96 isActiveSample bool 97 98 welfordCount int 99 welfordM float64 100 welfordS float64 101 welfordStart time.Time 102 welfordLast time.Time 103 } 104 105 func NewTimeSeries[T number](params TimeSeriesParams) *TimeSeries[T] { 106 t := &TimeSeries[T]{ 107 params: params, 108 samples: list.New(), 109 } 110 111 t.initSamples() 112 return t 113 } 114 115 func (t *TimeSeries[T]) UpdateSample(val T) { 116 t.lock.Lock() 117 defer t.lock.Unlock() 118 119 if !t.isActiveSample { 120 t.isActiveSample = true 121 t.activeSample = val 122 return 123 } 124 125 switch t.params.UpdateOp { 126 case TimeSeriesUpdateOpAdd: 127 t.activeSample += val 128 case TimeSeriesUpdateOpMax: 129 if val > t.activeSample { 130 t.activeSample = val 131 } 132 case TimeSeriesUpdateOpLatest: 133 t.activeSample = val 134 } 135 } 136 137 func (t *TimeSeries[T]) CommitActiveSample() { 138 t.CommitActiveSampleAt(time.Now()) 139 } 140 141 func (t *TimeSeries[T]) CommitActiveSampleAt(at time.Time) { 142 t.lock.Lock() 143 defer t.lock.Unlock() 144 145 if !t.isActiveSample { 146 return 147 } 148 149 t.addSampleAt(t.activeSample, at) 150 t.isActiveSample = false 151 } 152 153 func (t *TimeSeries[T]) AddSample(val T) { 154 t.AddSampleAt(val, time.Now()) 155 } 156 157 func (t *TimeSeries[T]) AddSampleAt(val T, at time.Time) { 158 t.lock.Lock() 159 defer t.lock.Unlock() 160 161 t.addSampleAt(val, at) 162 } 163 164 func (t *TimeSeries[T]) GetSamples() []TimeSeriesSample[T] { 165 t.lock.Lock() 166 defer t.lock.Unlock() 167 168 t.prune() 169 170 samples := make([]TimeSeriesSample[T], 0, t.samples.Len()) 171 for e := t.samples.Front(); e != nil; e = e.Next() { 172 samples = append(samples, e.Value.(TimeSeriesSample[T])) 173 } 174 return samples 175 } 176 177 func (t *TimeSeries[T]) GetSamplesAfter(at time.Time) []TimeSeriesSample[T] { 178 t.lock.Lock() 179 defer t.lock.Unlock() 180 181 t.prune() 182 183 samples := make([]TimeSeriesSample[T], 0, t.samples.Len()) 184 for e := t.samples.Front(); e != nil; e = e.Next() { 185 s := e.Value.(TimeSeriesSample[T]) 186 if s.At.After(at) { 187 samples = append(samples, s) 188 } 189 } 190 return samples 191 } 192 193 func (t *TimeSeries[T]) ClearSamples() { 194 t.lock.Lock() 195 defer t.lock.Unlock() 196 197 t.initSamples() 198 } 199 200 func (t *TimeSeries[T]) Sum() float64 { 201 t.lock.Lock() 202 defer t.lock.Unlock() 203 204 t.prune() 205 206 sum := float64(0.0) 207 for e := t.samples.Front(); e != nil; e = e.Next() { 208 s := e.Value.(TimeSeriesSample[T]) 209 sum += float64(s.Value) 210 } 211 212 return sum 213 } 214 215 func (t *TimeSeries[T]) Min() T { 216 t.lock.Lock() 217 defer t.lock.Unlock() 218 219 t.prune() 220 221 return t.minLocked(t.samples.Len()) 222 } 223 224 func (t *TimeSeries[T]) minLocked(numSamples int) T { 225 min := T(0) 226 for e, samplesSeen := t.samples.Back(), 0; e != nil && samplesSeen < numSamples; e, samplesSeen = e.Prev(), samplesSeen+1 { 227 s := e.Value.(TimeSeriesSample[T]) 228 if min == T(0) || min > s.Value { 229 min = s.Value 230 } 231 } 232 233 return min 234 } 235 236 func (t *TimeSeries[T]) Max() T { 237 t.lock.Lock() 238 defer t.lock.Unlock() 239 240 t.prune() 241 242 return t.maxLocked(t.samples.Len()) 243 } 244 245 func (t *TimeSeries[T]) maxLocked(numSamples int) T { 246 max := T(0) 247 for e, samplesSeen := t.samples.Back(), 0; e != nil && samplesSeen < numSamples; e, samplesSeen = e.Prev(), samplesSeen+1 { 248 s := e.Value.(TimeSeriesSample[T]) 249 if max < s.Value { 250 max = s.Value 251 } 252 } 253 254 return max 255 } 256 257 func (t *TimeSeries[T]) CurrentRun(threshold T, op TimeSeriesCompareOp) time.Duration { 258 t.lock.Lock() 259 defer t.lock.Unlock() 260 261 t.prune() 262 263 start := time.Time{} 264 end := time.Time{} 265 266 for e := t.samples.Back(); e != nil; e = e.Prev() { 267 cond := false 268 s := e.Value.(TimeSeriesSample[T]) 269 switch op { 270 case TimeSeriesCompareOpEQ: 271 cond = s.Value == threshold 272 case TimeSeriesCompareOpNE: 273 cond = s.Value != threshold 274 case TimeSeriesCompareOpGT: 275 cond = s.Value > threshold 276 case TimeSeriesCompareOpGTE: 277 cond = s.Value >= threshold 278 case TimeSeriesCompareOpLT: 279 cond = s.Value < threshold 280 case TimeSeriesCompareOpLTE: 281 cond = s.Value <= threshold 282 } 283 if !cond { 284 break 285 } 286 if end.IsZero() { 287 end = s.At 288 } 289 start = s.At 290 } 291 292 if end.IsZero() || start.IsZero() { 293 return 0 294 } 295 296 return end.Sub(start) 297 } 298 299 func (t *TimeSeries[T]) OnlineAverage() float64 { 300 t.lock.RLock() 301 defer t.lock.RUnlock() 302 303 return t.welfordM 304 } 305 306 func (t *TimeSeries[T]) OnlineVariance() float64 { 307 t.lock.RLock() 308 defer t.lock.RUnlock() 309 310 return t.onlineVarianceLocked() 311 } 312 313 func (t *TimeSeries[T]) onlineVarianceLocked() float64 { 314 if t.welfordCount > 1 { 315 return t.welfordS / float64(t.welfordCount-1) 316 } 317 318 return 0.0 319 } 320 321 func (t *TimeSeries[T]) OnlineStdDev() float64 { 322 t.lock.RLock() 323 defer t.lock.RUnlock() 324 325 return t.onlineStdDevLocked() 326 } 327 328 func (t *TimeSeries[T]) onlineStdDevLocked() float64 { 329 return math.Sqrt(t.onlineVarianceLocked()) 330 } 331 332 func (t *TimeSeries[T]) ZScore(val T) float64 { 333 t.lock.RLock() 334 defer t.lock.RUnlock() 335 336 onlineStdDev := t.onlineStdDevLocked() 337 if onlineStdDev != 0.0 { 338 return (float64(val) - t.welfordM) / t.onlineStdDevLocked() 339 } 340 341 return 0.0 342 } 343 344 func (t *TimeSeries[T]) Slope() float64 { 345 t.lock.Lock() 346 defer t.lock.Unlock() 347 348 t.prune() 349 350 numSamples := t.samples.Len() 351 slope, _, _, _ := t.linearFitLocked(numSamples) 352 353 // convert to angle to normalize between -90deg to +90deg 354 return math.Atan(slope) * 180 / math.Pi 355 } 356 357 func (t *TimeSeries[T]) linearFitLocked(numSamples int) (slope float64, intercept float64, startedAt time.Time, endedAt time.Time) { 358 // go back numSamples first 359 e := t.samples.Back() 360 for i := 1; i < numSamples && e != nil; i++ { 361 e = e.Prev() 362 } 363 364 if e == nil { 365 // not enough samples 366 return 367 } 368 369 sx := float64(0.0) 370 sxsq := float64(0.0) 371 sy := float64(0.0) 372 sysq := float64(0.0) 373 sxy := float64(0.0) 374 375 for ; e != nil; e = e.Next() { 376 s := e.Value.(TimeSeriesSample[T]) 377 if startedAt.IsZero() { 378 startedAt = s.At 379 } 380 if endedAt.IsZero() || s.At.After(endedAt) { 381 endedAt = s.At 382 } 383 384 x := s.At.Sub(startedAt).Seconds() 385 y := float64(s.Value) 386 387 sx += x 388 sxsq += x * x 389 390 sy += y 391 sysq += y * y 392 393 sxy += x * y 394 } 395 396 N := float64(numSamples) 397 sxwsq := sx * sx 398 denom := N*sxsq - sxwsq 399 if denom != 0.0 { 400 slope = (N*sxy - sx*sy) / denom 401 } 402 intercept = (sy - slope*sx) / N 403 return 404 } 405 406 func (t *TimeSeries[T]) LinearExtrapolateTo(numSamplesToUse int, after time.Duration) (float64, error) { 407 t.lock.Lock() 408 defer t.lock.Unlock() 409 410 t.prune() 411 412 slope, intercept, startedAt, endedAt := t.linearFitLocked(numSamplesToUse) 413 if startedAt.IsZero() { 414 return 0, errNotEnoughSamples 415 } 416 417 x := endedAt.Add(after).Sub(startedAt).Seconds() 418 y := slope*x + intercept 419 return y, nil 420 } 421 422 func (t *TimeSeries[T]) KendallsTau(numSamplesToUse int) (float64, error) { 423 t.lock.Lock() 424 t.prune() 425 426 if t.samples.Len() < numSamplesToUse { 427 t.lock.Unlock() 428 return 0.0, errNotEnoughSamples 429 } 430 431 values := make([]T, numSamplesToUse) 432 idx := numSamplesToUse - 1 433 for e := t.samples.Back(); e != nil; e = e.Prev() { 434 if idx < 0 { 435 break 436 } 437 438 s := e.Value.(TimeSeriesSample[T]) 439 values[idx] = s.Value 440 idx-- 441 } 442 t.lock.Unlock() 443 444 concordantPairs := 0 445 discordantPairs := 0 446 for i := 0; i < len(values)-1; i++ { 447 for j := i + 1; j < len(values); j++ { 448 if values[i] < values[j] { 449 concordantPairs++ 450 } else if values[i] > values[j] { 451 discordantPairs++ 452 } 453 } 454 } 455 456 if (concordantPairs + discordantPairs) == 0 { 457 return 0.0, nil 458 } 459 460 return (float64(concordantPairs) - float64(discordantPairs)) / (float64(concordantPairs) + float64(discordantPairs)), nil 461 } 462 463 func (t *TimeSeries[T]) initSamples() { 464 t.samples = t.samples.Init() 465 } 466 467 func (t *TimeSeries[T]) addSampleAt(val T, at time.Time) { 468 // insert in time order 469 e := t.samples.Back() 470 if e != nil { 471 lastSample := e.Value.(TimeSeriesSample[T]) 472 if val == lastSample.Value && at.Sub(lastSample.At) < t.params.CollapseDuration { 473 // repeated value within collapse duration 474 t.prune() 475 return 476 } 477 } 478 for e = t.samples.Back(); e != nil; e = e.Prev() { 479 s := e.Value.(TimeSeriesSample[T]) 480 if at.After(s.At) { 481 break 482 } 483 } 484 485 sample := TimeSeriesSample[T]{ 486 Value: val, 487 At: at, 488 } 489 switch { 490 case e != nil: // in the middle 491 t.samples.InsertAfter(sample, e) 492 493 case t.samples.Front() != nil: // in the front 494 t.samples.PushFront(sample) 495 496 default: // at the end 497 t.samples.PushBack(sample) 498 } 499 500 t.updateWelfordStats(val, at) 501 502 t.prune() 503 } 504 505 func (t *TimeSeries[T]) updateWelfordStats(val T, at time.Time) { 506 t.welfordCount++ 507 mLast := t.welfordM 508 t.welfordM += (float64(val) - t.welfordM) / float64(t.welfordCount) 509 t.welfordS += (float64(val) - mLast) * (float64(val) - t.welfordM) 510 511 if t.welfordStart.IsZero() { 512 t.welfordStart = at 513 } 514 t.welfordLast = at 515 } 516 517 func (t *TimeSeries[T]) prune() { 518 thresh := t.welfordLast.Add(-t.params.Window) 519 //thresh := time.Now().Add(-t.params.Window) 520 521 toRemove := make([]*list.Element, 0, t.samples.Len()) 522 for e := t.samples.Front(); e != nil; e = e.Next() { 523 s := e.Value.(TimeSeriesSample[T]) 524 if s.At.After(thresh) { 525 break 526 } 527 528 toRemove = append(toRemove, e) 529 } 530 531 for _, e := range toRemove { 532 t.samples.Remove(e) 533 } 534 } 535 536 // TODO - a bunch of stats 537 // - sum 538 // - moving average 539 // - EWMA 540 // - min 541 // - max 542 // - average 543 // - median 544 // - variance 545 // - stddev 546 // - trend 547 // - run 548 // - z-score