github.com/benz9527/xboot@v0.0.0-20240504061247-c23f15593274/timer/x_timing_wheels_v1.go (about) 1 package timer 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "log/slog" 8 "runtime" 9 "runtime/debug" 10 "strconv" 11 "sync/atomic" 12 "time" 13 "unsafe" 14 15 "github.com/panjf2000/ants/v2" 16 17 "github.com/benz9527/xboot/lib/hrtime" 18 "github.com/benz9527/xboot/lib/id" 19 "github.com/benz9527/xboot/lib/infra" 20 "github.com/benz9527/xboot/lib/kv" 21 "github.com/benz9527/xboot/lib/queue" 22 ) 23 24 var ( 25 _ TimingWheel = (*timingWheel)(nil) 26 _ TimingWheels = (*xTimingWheels)(nil) 27 ) 28 29 type timingWheel struct { 30 slots []TimingWheelSlot // In kafka it is buckets 31 // ctx is used to shut down the timing wheel and pass 32 // value to control debug info. 33 ctx context.Context 34 globalDqRef queue.DelayQueue[TimingWheelSlot] 35 overflowWheelRef unsafe.Pointer // same as kafka TimingWheel(*timingWheel) 36 tickMs int64 37 startMs int64 // baseline startup timestamp 38 interval int64 39 currentTimeMs int64 40 slotSize int64 // in kafka it is wheelSize 41 globalStats *xTimingWheelsStats 42 clock hrtime.Clock 43 } 44 45 func (tw *timingWheel) GetTickMs() int64 { 46 return atomic.LoadInt64(&tw.tickMs) 47 } 48 49 func (tw *timingWheel) GetStartMs() int64 { 50 return atomic.LoadInt64(&tw.startMs) 51 } 52 53 func (tw *timingWheel) GetCurrentTimeMs() int64 { 54 return atomic.LoadInt64(&tw.currentTimeMs) 55 } 56 57 func (tw *timingWheel) GetInterval() int64 { 58 return atomic.LoadInt64(&tw.interval) 59 } 60 61 func (tw *timingWheel) GetSlotSize() int64 { 62 return atomic.LoadInt64(&tw.slotSize) 63 } 64 65 func (tw *timingWheel) getOverflowTimingWheel() TimingWheel { 66 return *(*TimingWheel)(atomic.LoadPointer(&tw.overflowWheelRef)) 67 } 68 69 func (tw *timingWheel) setOverflowTimingWheel(oftw TimingWheel) { 70 atomic.StorePointer(&tw.overflowWheelRef, unsafe.Pointer(&oftw)) 71 } 72 73 // Here related to slot level upgrade and downgrade. 74 func (tw *timingWheel) advanceClock(slotExpiredMs int64) { 75 currentTimeMs := tw.GetCurrentTimeMs() 76 tickMs := tw.GetTickMs() 77 if slotExpiredMs >= currentTimeMs+tickMs { 78 currentTimeMs = slotExpiredMs - (slotExpiredMs % tickMs) // truncate the remainder as slot expiredMs left boundary 79 atomic.StoreInt64(&tw.currentTimeMs, currentTimeMs) // update the current time 80 oftw := tw.getOverflowTimingWheel() 81 if oftw != nil { 82 oftw.(*timingWheel).advanceClock(currentTimeMs) 83 } 84 } 85 } 86 87 func (tw *timingWheel) addTask(task Task, level int64) error { 88 if len(task.GetJobID()) <= 0 { 89 return ErrTimingWheelTaskEmptyJobID 90 } 91 if task.GetJob() == nil { 92 return ErrTimingWheelEmptyJob 93 } 94 if task.Cancelled() { 95 return ErrTimingWheelTaskCancelled 96 } 97 98 taskExpiredMs := task.GetExpiredMs() 99 currentTimeMs := tw.clock.NowInDefaultTZ().UnixMilli() 100 tickMs := tw.GetTickMs() 101 interval := tw.GetInterval() 102 slotSize := tw.GetSlotSize() 103 diff := taskExpiredMs - currentTimeMs 104 105 if level == 0 && diff <= tickMs { 106 task.setSlot(immediateExpiredSlot) 107 return ErrTimingWheelTaskIsExpired 108 } 109 if diff > tickMs && diff < interval { 110 virtualID := taskExpiredMs / tickMs 111 slotID := virtualID % slotSize 112 slot := tw.slots[slotID] 113 if slot.GetExpirationMs() == virtualID*tickMs { 114 if err := slot.AddTask(task); errors.Is(err, ErrTimingWheelTaskUnableToBeAddedToSlot) { 115 task.setSlot(immediateExpiredSlot) 116 return ErrTimingWheelTaskIsExpired 117 } else if err != nil { 118 return err 119 } 120 } else { 121 if slot.setExpirationMs(virtualID * tickMs) { 122 slot.setSlotID(slotID) 123 slot.setLevel(level) 124 if err := slot.AddTask(task); err != nil { 125 return err 126 } 127 tw.globalDqRef.Offer(slot, slot.GetExpirationMs()) 128 } 129 } 130 return nil 131 } 132 // Out of the interval. Put it into the higher interval timing wheel 133 if ovf := tw.getOverflowTimingWheel(); ovf == nil { 134 tw.setOverflowTimingWheel(newTimingWheel( 135 tw.ctx, 136 interval, 137 slotSize, 138 currentTimeMs, 139 tw.globalStats, 140 tw.globalDqRef, 141 tw.clock, 142 )) 143 } 144 // Tail recursive call, it will be free the previous stack frame. 145 return tw.getOverflowTimingWheel().(*timingWheel).addTask(task, level+1) 146 } 147 148 func newTimingWheel( 149 ctx context.Context, 150 tickMs int64, 151 slotSize int64, 152 startMs int64, 153 stats *xTimingWheelsStats, 154 dq queue.DelayQueue[TimingWheelSlot], 155 clock hrtime.Clock, 156 ) TimingWheel { 157 tw := &timingWheel{ 158 ctx: ctx, 159 tickMs: tickMs, 160 startMs: startMs, 161 slotSize: slotSize, 162 globalStats: stats, 163 interval: tickMs * slotSize, // Pay attention to the overflow 164 currentTimeMs: startMs - (startMs % tickMs), // truncate the remainder as startMs left boundary 165 slots: make([]TimingWheelSlot, slotSize), 166 globalDqRef: dq, 167 clock: clock, 168 } 169 // Slot initializes by doubly linked list. 170 for i := int64(0); i < slotSize; i++ { 171 tw.slots[i] = NewXSlot() 172 } 173 tw.globalStats.RecordSlotCount(slotSize) 174 tw.setOverflowTimingWheel(nil) 175 return tw 176 } 177 178 const ( 179 disableTimingWheelsSchedulePoll = "disableTWSPoll" 180 disableTimingWheelsScheduleCancelTask = "disableTWSCancelTask" 181 ) 182 183 type TimingWheelTimeSourceEnum int8 184 185 type xTimingWheels struct { 186 tw TimingWheel 187 ctx context.Context 188 dq queue.DelayQueue[TimingWheelSlot] // Do not use the timer.Ticker 189 tasksMap kv.ThreadSafeStorer[JobID, Task] 190 stopC chan struct{} 191 expiredSlotC infra.ClosableChannel[TimingWheelSlot] 192 twEventC infra.ClosableChannel[*timingWheelEvent] 193 twEventPool *timingWheelEventsPool 194 gPool *ants.Pool 195 stats *xTimingWheelsStats 196 isRunning *atomic.Bool 197 clock hrtime.Clock 198 idGenerator id.Gen 199 name string 200 } 201 202 func (xtw *xTimingWheels) GetTickMs() int64 { 203 return xtw.tw.GetTickMs() 204 } 205 206 func (xtw *xTimingWheels) GetStartMs() int64 { 207 return xtw.tw.GetStartMs() 208 } 209 210 func (xtw *xTimingWheels) Shutdown() { 211 if xtw == nil { 212 return 213 } 214 if old := xtw.isRunning.Swap(false); !old { 215 slog.Warn("[x-timing-wheels] timing wheel is already shutdown") 216 return 217 } 218 xtw.isRunning.Store(false) 219 220 close(xtw.stopC) 221 _ = xtw.expiredSlotC.Close() 222 _ = xtw.twEventC.Close() 223 xtw.gPool.Release() 224 225 runtime.SetFinalizer(xtw, func(xtw *xTimingWheels) { 226 xtw.dq = nil 227 _ = xtw.tasksMap.Purge() 228 }) 229 } 230 231 func (xtw *xTimingWheels) AddTask(task Task) error { 232 if len(task.GetJobID()) <= 0 { 233 return ErrTimingWheelTaskEmptyJobID 234 } 235 if task.GetJob() == nil { 236 return ErrTimingWheelEmptyJob 237 } 238 if !xtw.isRunning.Load() { 239 return ErrTimingWheelStopped 240 } 241 event := xtw.twEventPool.Get() 242 event.AddTask(task) 243 return xtw.twEventC.Send(event) 244 } 245 246 func (xtw *xTimingWheels) AfterFunc(delayMs time.Duration, fn Job) (Task, error) { 247 if delayMs.Milliseconds() < xtw.GetTickMs() { 248 return nil, infra.WrapErrorStackWithMessage(ErrTimingWheelTaskTooShortExpiration, "[x-timing-wheels] delay ms "+strconv.FormatInt(delayMs.Milliseconds(), 10)+ 249 " is less than tick ms "+strconv.FormatInt(xtw.GetTickMs(), 10)) 250 } 251 if fn == nil { 252 return nil, infra.WrapErrorStack(ErrTimingWheelEmptyJob) 253 } 254 255 var now = xtw.clock.NowInDefaultTZ() 256 task := NewOnceTask( 257 xtw.ctx, 258 JobID(strconv.FormatUint(xtw.idGenerator(), 10)), 259 now.Add(delayMs).UnixMilli(), 260 fn, 261 ) 262 263 if !xtw.isRunning.Load() { 264 return nil, infra.WrapErrorStack(ErrTimingWheelStopped) 265 } 266 if err := xtw.AddTask(task); err != nil { 267 return nil, infra.WrapErrorStack(err) 268 } 269 return task, nil 270 } 271 272 func (xtw *xTimingWheels) ScheduleFunc(schedFn func() Scheduler, fn Job) (Task, error) { 273 if schedFn == nil { 274 return nil, infra.WrapErrorStack(ErrTimingWheelUnknownScheduler) 275 } 276 if fn == nil { 277 return nil, infra.WrapErrorStack(ErrTimingWheelEmptyJob) 278 } 279 280 var now = xtw.clock.NowInDefaultTZ() 281 task := NewRepeatTask( 282 xtw.ctx, 283 JobID(fmt.Sprintf("%v", xtw.idGenerator())), 284 now.UnixMilli(), schedFn(), 285 fn, 286 ) 287 288 if !xtw.isRunning.Load() { 289 return nil, infra.WrapErrorStack(ErrTimingWheelStopped) 290 } 291 if err := xtw.AddTask(task); err != nil { 292 return nil, infra.WrapErrorStack(err) 293 } 294 return task, nil 295 } 296 297 func (xtw *xTimingWheels) CancelTask(jobID JobID) error { 298 if len(jobID) <= 0 { 299 return infra.WrapErrorStack(ErrTimingWheelTaskEmptyJobID) 300 } 301 302 if xtw.isRunning.Load() { 303 return infra.WrapErrorStack(ErrTimingWheelStopped) 304 } 305 task, ok := xtw.tasksMap.Get(jobID) 306 if !ok { 307 return infra.WrapErrorStack(ErrTimingWheelTaskNotFound) 308 } 309 310 event := xtw.twEventPool.Get() 311 event.CancelTaskJobID(task.GetJobID()) 312 return xtw.twEventC.Send(event) 313 } 314 315 func (xtw *xTimingWheels) schedule(ctx context.Context) { 316 if ctx == nil { 317 return 318 } 319 // FIXME Block error mainly caused by producer and consumer speed mismatch, lock data race. 320 // Is there any limitation mechanism could gradually control different interval task‘s execution timeout timestamp? 321 // Tasks piling up in the same slot will cause the timing wheel to be blocked or delayed. 322 _ = xtw.gPool.Submit(func() { 323 defer func() { 324 if err := recover(); err != nil { 325 slog.Error("[x-timing-wheels] event schedule panic recover", "error", err, "stack", debug.Stack()) 326 } 327 }() 328 cancelDisabled := ctx.Value(disableTimingWheelsScheduleCancelTask) 329 if cancelDisabled == nil { 330 cancelDisabled = false 331 } 332 eventC := xtw.twEventC.Wait() 333 slotC := xtw.expiredSlotC.Wait() 334 for { 335 select { 336 case <-ctx.Done(): 337 xtw.Shutdown() 338 return 339 case <-xtw.stopC: 340 return 341 default: 342 if xtw.twEventC.IsClosed() { 343 slog.Warn("[x-timing-wheels] event channel has been closed") 344 return 345 } 346 if xtw.expiredSlotC.IsClosed() { 347 slog.Warn("[x-timing-wheels] slot channel has been closed") 348 return 349 } 350 } 351 352 select { 353 case slot := <-slotC: 354 xtw.advanceClock(slot.GetExpirationMs()) 355 // Here related to slot level upgrade and downgrade. 356 if slot != nil && slot.GetExpirationMs() > slotHasBeenFlushedMs { 357 xtw.stats.UpdateSlotActiveCount(xtw.dq.Len()) 358 // Reset the slot, ready for the next round. 359 slot.setExpirationMs(slotHasBeenFlushedMs) 360 slot.Flush(xtw.handleTask) 361 } 362 case event := <-eventC: 363 switch op := event.GetOperation(); op { 364 case addTask, reAddTask: 365 task, ok := event.GetTask() 366 if !ok { 367 goto recycle 368 } 369 if err := xtw.addTask(task); errors.Is(err, ErrTimingWheelTaskIsExpired) { 370 // Avoid data race. 371 xtw.handleTask(task) 372 } 373 if op == addTask { 374 xtw.stats.RecordJobAliveCount(1) 375 } 376 case cancelTask: 377 jobID, ok := event.GetCancelTaskJobID() 378 if !ok || cancelDisabled.(bool) { 379 goto recycle 380 } 381 // Avoid data race 382 _ = xtw.cancelTask(jobID) 383 case unknown: 384 fallthrough 385 default: 386 387 } 388 recycle: 389 xtw.twEventPool.Put(event) 390 } 391 } 392 }) 393 _ = xtw.gPool.Submit(func() { 394 func(disabled any) { 395 if disabled != nil && disabled.(bool) { 396 slog.Warn("[x-timing-wheels] delay queue poll disabled") 397 return 398 } 399 defer func() { 400 if err := recover(); err != nil { 401 slog.Error("[x-timing-wheels] poll schedule panic recover", "error", err, "stack", debug.Stack()) 402 } 403 slog.Warn("[x-timing-wheels] delay queue exit") 404 }() 405 xtw.dq.PollToChan(func() int64 { 406 return xtw.clock.NowInDefaultTZ().UnixMilli() 407 }, xtw.expiredSlotC) 408 }(ctx.Value(disableTimingWheelsSchedulePoll)) 409 }) 410 xtw.isRunning.Store(true) 411 } 412 413 // Update all wheels' current time, in order to simulate the time is continuously incremented. 414 // Here related to slot level upgrade and downgrade. 415 func (xtw *xTimingWheels) advanceClock(timeoutMs int64) { 416 xtw.tw.(*timingWheel).advanceClock(timeoutMs) 417 } 418 419 func (xtw *xTimingWheels) addTask(task Task) error { 420 if task == nil || task.Cancelled() || !xtw.isRunning.Load() { 421 return ErrTimingWheelStopped 422 } 423 err := xtw.tw.(*timingWheel).addTask(task, 0) 424 if err == nil || errors.Is(err, ErrTimingWheelTaskIsExpired) { 425 xtw.tasksMap.AddOrUpdate(task.GetJobID(), task) 426 } 427 return infra.WrapErrorStack(err) 428 } 429 430 // handleTask all tasks which are called by this method 431 // will mean that the task must be in a slot ever and related slot 432 // has been expired. 433 func (xtw *xTimingWheels) handleTask(t Task) { 434 if t == nil || !xtw.isRunning.Load() { 435 slog.Warn("[x-timing-wheels] handle task failed", 436 "task is nil", t == nil, 437 "timing wheel is running", xtw.isRunning.Load(), 438 ) 439 return 440 } 441 442 // [slotExpMs, slotExpMs+interval) 443 var ( 444 prevSlotMetadata = t.GetPreviousSlotMetadata() 445 slot = t.GetSlot() 446 taskLevel int64 447 runNow bool 448 ) 449 if prevSlotMetadata == nil && slot != immediateExpiredSlot { 450 // Unknown task 451 return 452 } else if prevSlotMetadata == nil && slot == immediateExpiredSlot { 453 runNow = true 454 } else if prevSlotMetadata != nil { 455 taskLevel = prevSlotMetadata.GetLevel() 456 runNow = prevSlotMetadata.GetExpirationMs() == sentinelSlotExpiredMs 457 runNow = runNow || (taskLevel == 0 && t.GetExpiredMs() <= prevSlotMetadata.GetExpirationMs()+xtw.GetTickMs()) 458 } 459 runNow = runNow || t.GetExpiredMs() <= xtw.clock.NowInDefaultTZ().UnixMilli() 460 461 if runNow && !t.Cancelled() { 462 job := t.GetJob() 463 md := t.GetJobMetadata() 464 _ = xtw.gPool.Submit(func() { 465 jobStatsWrapper(xtw.stats, job)(xtw.ctx, md) 466 }) 467 } else if t.Cancelled() { 468 if slot != nil { 469 slot.RemoveTask(t) 470 } 471 t.setSlot(nil) 472 t.setSlotMetadata(nil) 473 return 474 } 475 476 // Re-addTask loop job to timing wheel. 477 // Upgrade and downgrade (move) the t from one slot to another slot. 478 // Lock free. 479 switch t.GetJobType() { 480 case OnceJob: 481 event := xtw.twEventPool.Get() 482 if runNow { 483 event.CancelTaskJobID(t.GetJobID()) 484 } else { 485 event.ReAddTask(t) 486 } 487 _ = xtw.twEventC.Send(event) 488 case RepeatedJob: 489 var sTask Task 490 if !runNow { 491 sTask = t 492 } else { 493 if t.GetRestLoopCount() == 0 { 494 event := xtw.twEventPool.Get() 495 event.CancelTaskJobID(t.GetJobID()) 496 _ = xtw.twEventC.Send(event) 497 return 498 } 499 _sTask, ok := t.(ScheduledTask) 500 if !ok { 501 return 502 } 503 _sTask.UpdateNextScheduledMs() 504 sTask = _sTask 505 if sTask.GetExpiredMs() < 0 { 506 return 507 } 508 } 509 if sTask != nil { 510 event := xtw.twEventPool.Get() 511 event.ReAddTask(sTask) 512 _ = xtw.twEventC.Send(event) 513 } 514 } 515 return 516 } 517 518 func (xtw *xTimingWheels) cancelTask(jobID JobID) error { 519 if !xtw.isRunning.Load() { 520 return infra.WrapErrorStack(ErrTimingWheelStopped) 521 } 522 523 task, ok := xtw.tasksMap.Get(jobID) 524 if !ok { 525 return infra.WrapErrorStack(ErrTimingWheelTaskNotFound) 526 } 527 528 if task.GetSlot() != nil && !task.GetSlot().RemoveTask(task) { 529 return infra.WrapErrorStack(ErrTimingWheelTaskUnableToBeRemoved) 530 } 531 532 defer func() { 533 xtw.stats.IncreaseJobCancelledCount() 534 xtw.stats.RecordJobAliveCount(-1) 535 }() 536 537 task.Cancel() 538 539 _, err := xtw.tasksMap.Delete(jobID) 540 return infra.WrapErrorStack(err) 541 } 542 543 // NewXTimingWheels creates a new timing wheel. 544 // The same as the kafka, Time.SYSTEM.hiResClockMs() is used. 545 func NewXTimingWheels(ctx context.Context, opts ...TimingWheelsOption) TimingWheels { 546 if ctx == nil { 547 return nil 548 } 549 550 xtwOpt := &xTimingWheelsOption{} 551 for _, o := range opts { 552 if o != nil { 553 o(xtwOpt) 554 } 555 } 556 xtwOpt.Validate() 557 558 xtw := &xTimingWheels{ 559 ctx: ctx, 560 stopC: make(chan struct{}), 561 twEventC: infra.NewSafeClosableChannel[*timingWheelEvent](xtwOpt.getEventBufferSize()), 562 expiredSlotC: infra.NewSafeClosableChannel[TimingWheelSlot](xtwOpt.getExpiredSlotBufferSize()), 563 tasksMap: kv.NewThreadSafeMap[JobID, Task](), 564 isRunning: &atomic.Bool{}, 565 clock: xtwOpt.getClock(), 566 idGenerator: xtwOpt.getIDGenerator(), 567 twEventPool: newTimingWheelEventsPool(), 568 stats: xtwOpt.getStats(), 569 name: xtwOpt.getName(), 570 } 571 xtw.isRunning.Store(false) 572 if p, err := ants.NewPool(xtwOpt.getWorkerPoolSize(), ants.WithPreAlloc(true)); err != nil { 573 panic(err) 574 } else { 575 xtw.gPool = p 576 } 577 xtw.dq = queue.NewArrayDelayQueue[TimingWheelSlot](ctx, xtwOpt.defaultDelayQueueCapacity()) 578 xtw.tw = newTimingWheel( 579 ctx, 580 xtwOpt.getBasicTickMilliseconds(), 581 xtwOpt.getSlotIncrementSize(), 582 xtwOpt.getClock().NowInDefaultTZ().UnixMilli(), 583 xtw.stats, 584 xtw.dq, 585 xtw.clock, 586 ) 587 xtw.isRunning.Store(true) 588 xtw.schedule(ctx) 589 return xtw 590 }