github.com/benz9527/xboot@v0.0.0-20240504061247-c23f15593274/timer/x_timing_wheels_v1.go (about)

     1  package timer
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log/slog"
     8  	"runtime"
     9  	"runtime/debug"
    10  	"strconv"
    11  	"sync/atomic"
    12  	"time"
    13  	"unsafe"
    14  
    15  	"github.com/panjf2000/ants/v2"
    16  
    17  	"github.com/benz9527/xboot/lib/hrtime"
    18  	"github.com/benz9527/xboot/lib/id"
    19  	"github.com/benz9527/xboot/lib/infra"
    20  	"github.com/benz9527/xboot/lib/kv"
    21  	"github.com/benz9527/xboot/lib/queue"
    22  )
    23  
    24  var (
    25  	_ TimingWheel  = (*timingWheel)(nil)
    26  	_ TimingWheels = (*xTimingWheels)(nil)
    27  )
    28  
    29  type timingWheel struct {
    30  	slots []TimingWheelSlot // In kafka it is buckets
    31  	// ctx is used to shut down the timing wheel and pass
    32  	// value to control debug info.
    33  	ctx              context.Context
    34  	globalDqRef      queue.DelayQueue[TimingWheelSlot]
    35  	overflowWheelRef unsafe.Pointer // same as kafka TimingWheel(*timingWheel)
    36  	tickMs           int64
    37  	startMs          int64 // baseline startup timestamp
    38  	interval         int64
    39  	currentTimeMs    int64
    40  	slotSize         int64 // in kafka it is wheelSize
    41  	globalStats      *xTimingWheelsStats
    42  	clock            hrtime.Clock
    43  }
    44  
    45  func (tw *timingWheel) GetTickMs() int64 {
    46  	return atomic.LoadInt64(&tw.tickMs)
    47  }
    48  
    49  func (tw *timingWheel) GetStartMs() int64 {
    50  	return atomic.LoadInt64(&tw.startMs)
    51  }
    52  
    53  func (tw *timingWheel) GetCurrentTimeMs() int64 {
    54  	return atomic.LoadInt64(&tw.currentTimeMs)
    55  }
    56  
    57  func (tw *timingWheel) GetInterval() int64 {
    58  	return atomic.LoadInt64(&tw.interval)
    59  }
    60  
    61  func (tw *timingWheel) GetSlotSize() int64 {
    62  	return atomic.LoadInt64(&tw.slotSize)
    63  }
    64  
    65  func (tw *timingWheel) getOverflowTimingWheel() TimingWheel {
    66  	return *(*TimingWheel)(atomic.LoadPointer(&tw.overflowWheelRef))
    67  }
    68  
    69  func (tw *timingWheel) setOverflowTimingWheel(oftw TimingWheel) {
    70  	atomic.StorePointer(&tw.overflowWheelRef, unsafe.Pointer(&oftw))
    71  }
    72  
    73  // Here related to slot level upgrade and downgrade.
    74  func (tw *timingWheel) advanceClock(slotExpiredMs int64) {
    75  	currentTimeMs := tw.GetCurrentTimeMs()
    76  	tickMs := tw.GetTickMs()
    77  	if slotExpiredMs >= currentTimeMs+tickMs {
    78  		currentTimeMs = slotExpiredMs - (slotExpiredMs % tickMs) // truncate the remainder as slot expiredMs left boundary
    79  		atomic.StoreInt64(&tw.currentTimeMs, currentTimeMs)      // update the current time
    80  		oftw := tw.getOverflowTimingWheel()
    81  		if oftw != nil {
    82  			oftw.(*timingWheel).advanceClock(currentTimeMs)
    83  		}
    84  	}
    85  }
    86  
    87  func (tw *timingWheel) addTask(task Task, level int64) error {
    88  	if len(task.GetJobID()) <= 0 {
    89  		return ErrTimingWheelTaskEmptyJobID
    90  	}
    91  	if task.GetJob() == nil {
    92  		return ErrTimingWheelEmptyJob
    93  	}
    94  	if task.Cancelled() {
    95  		return ErrTimingWheelTaskCancelled
    96  	}
    97  
    98  	taskExpiredMs := task.GetExpiredMs()
    99  	currentTimeMs := tw.clock.NowInDefaultTZ().UnixMilli()
   100  	tickMs := tw.GetTickMs()
   101  	interval := tw.GetInterval()
   102  	slotSize := tw.GetSlotSize()
   103  	diff := taskExpiredMs - currentTimeMs
   104  
   105  	if level == 0 && diff <= tickMs {
   106  		task.setSlot(immediateExpiredSlot)
   107  		return ErrTimingWheelTaskIsExpired
   108  	}
   109  	if diff > tickMs && diff < interval {
   110  		virtualID := taskExpiredMs / tickMs
   111  		slotID := virtualID % slotSize
   112  		slot := tw.slots[slotID]
   113  		if slot.GetExpirationMs() == virtualID*tickMs {
   114  			if err := slot.AddTask(task); errors.Is(err, ErrTimingWheelTaskUnableToBeAddedToSlot) {
   115  				task.setSlot(immediateExpiredSlot)
   116  				return ErrTimingWheelTaskIsExpired
   117  			} else if err != nil {
   118  				return err
   119  			}
   120  		} else {
   121  			if slot.setExpirationMs(virtualID * tickMs) {
   122  				slot.setSlotID(slotID)
   123  				slot.setLevel(level)
   124  				if err := slot.AddTask(task); err != nil {
   125  					return err
   126  				}
   127  				tw.globalDqRef.Offer(slot, slot.GetExpirationMs())
   128  			}
   129  		}
   130  		return nil
   131  	}
   132  	// Out of the interval. Put it into the higher interval timing wheel
   133  	if ovf := tw.getOverflowTimingWheel(); ovf == nil {
   134  		tw.setOverflowTimingWheel(newTimingWheel(
   135  			tw.ctx,
   136  			interval,
   137  			slotSize,
   138  			currentTimeMs,
   139  			tw.globalStats,
   140  			tw.globalDqRef,
   141  			tw.clock,
   142  		))
   143  	}
   144  	// Tail recursive call, it will be free the previous stack frame.
   145  	return tw.getOverflowTimingWheel().(*timingWheel).addTask(task, level+1)
   146  }
   147  
   148  func newTimingWheel(
   149  	ctx context.Context,
   150  	tickMs int64,
   151  	slotSize int64,
   152  	startMs int64,
   153  	stats *xTimingWheelsStats,
   154  	dq queue.DelayQueue[TimingWheelSlot],
   155  	clock hrtime.Clock,
   156  ) TimingWheel {
   157  	tw := &timingWheel{
   158  		ctx:           ctx,
   159  		tickMs:        tickMs,
   160  		startMs:       startMs,
   161  		slotSize:      slotSize,
   162  		globalStats:   stats,
   163  		interval:      tickMs * slotSize,            // Pay attention to the overflow
   164  		currentTimeMs: startMs - (startMs % tickMs), // truncate the remainder as startMs left boundary
   165  		slots:         make([]TimingWheelSlot, slotSize),
   166  		globalDqRef:   dq,
   167  		clock:         clock,
   168  	}
   169  	// Slot initializes by doubly linked list.
   170  	for i := int64(0); i < slotSize; i++ {
   171  		tw.slots[i] = NewXSlot()
   172  	}
   173  	tw.globalStats.RecordSlotCount(slotSize)
   174  	tw.setOverflowTimingWheel(nil)
   175  	return tw
   176  }
   177  
   178  const (
   179  	disableTimingWheelsSchedulePoll       = "disableTWSPoll"
   180  	disableTimingWheelsScheduleCancelTask = "disableTWSCancelTask"
   181  )
   182  
   183  type TimingWheelTimeSourceEnum int8
   184  
   185  type xTimingWheels struct {
   186  	tw           TimingWheel
   187  	ctx          context.Context
   188  	dq           queue.DelayQueue[TimingWheelSlot] // Do not use the timer.Ticker
   189  	tasksMap     kv.ThreadSafeStorer[JobID, Task]
   190  	stopC        chan struct{}
   191  	expiredSlotC infra.ClosableChannel[TimingWheelSlot]
   192  	twEventC     infra.ClosableChannel[*timingWheelEvent]
   193  	twEventPool  *timingWheelEventsPool
   194  	gPool        *ants.Pool
   195  	stats        *xTimingWheelsStats
   196  	isRunning    *atomic.Bool
   197  	clock        hrtime.Clock
   198  	idGenerator  id.Gen
   199  	name         string
   200  }
   201  
   202  func (xtw *xTimingWheels) GetTickMs() int64 {
   203  	return xtw.tw.GetTickMs()
   204  }
   205  
   206  func (xtw *xTimingWheels) GetStartMs() int64 {
   207  	return xtw.tw.GetStartMs()
   208  }
   209  
   210  func (xtw *xTimingWheels) Shutdown() {
   211  	if xtw == nil {
   212  		return
   213  	}
   214  	if old := xtw.isRunning.Swap(false); !old {
   215  		slog.Warn("[x-timing-wheels] timing wheel is already shutdown")
   216  		return
   217  	}
   218  	xtw.isRunning.Store(false)
   219  
   220  	close(xtw.stopC)
   221  	_ = xtw.expiredSlotC.Close()
   222  	_ = xtw.twEventC.Close()
   223  	xtw.gPool.Release()
   224  
   225  	runtime.SetFinalizer(xtw, func(xtw *xTimingWheels) {
   226  		xtw.dq = nil
   227  		_ = xtw.tasksMap.Purge()
   228  	})
   229  }
   230  
   231  func (xtw *xTimingWheels) AddTask(task Task) error {
   232  	if len(task.GetJobID()) <= 0 {
   233  		return ErrTimingWheelTaskEmptyJobID
   234  	}
   235  	if task.GetJob() == nil {
   236  		return ErrTimingWheelEmptyJob
   237  	}
   238  	if !xtw.isRunning.Load() {
   239  		return ErrTimingWheelStopped
   240  	}
   241  	event := xtw.twEventPool.Get()
   242  	event.AddTask(task)
   243  	return xtw.twEventC.Send(event)
   244  }
   245  
   246  func (xtw *xTimingWheels) AfterFunc(delayMs time.Duration, fn Job) (Task, error) {
   247  	if delayMs.Milliseconds() < xtw.GetTickMs() {
   248  		return nil, infra.WrapErrorStackWithMessage(ErrTimingWheelTaskTooShortExpiration, "[x-timing-wheels] delay ms "+strconv.FormatInt(delayMs.Milliseconds(), 10)+
   249  			" is less than tick ms "+strconv.FormatInt(xtw.GetTickMs(), 10))
   250  	}
   251  	if fn == nil {
   252  		return nil, infra.WrapErrorStack(ErrTimingWheelEmptyJob)
   253  	}
   254  
   255  	var now = xtw.clock.NowInDefaultTZ()
   256  	task := NewOnceTask(
   257  		xtw.ctx,
   258  		JobID(strconv.FormatUint(xtw.idGenerator(), 10)),
   259  		now.Add(delayMs).UnixMilli(),
   260  		fn,
   261  	)
   262  
   263  	if !xtw.isRunning.Load() {
   264  		return nil, infra.WrapErrorStack(ErrTimingWheelStopped)
   265  	}
   266  	if err := xtw.AddTask(task); err != nil {
   267  		return nil, infra.WrapErrorStack(err)
   268  	}
   269  	return task, nil
   270  }
   271  
   272  func (xtw *xTimingWheels) ScheduleFunc(schedFn func() Scheduler, fn Job) (Task, error) {
   273  	if schedFn == nil {
   274  		return nil, infra.WrapErrorStack(ErrTimingWheelUnknownScheduler)
   275  	}
   276  	if fn == nil {
   277  		return nil, infra.WrapErrorStack(ErrTimingWheelEmptyJob)
   278  	}
   279  
   280  	var now = xtw.clock.NowInDefaultTZ()
   281  	task := NewRepeatTask(
   282  		xtw.ctx,
   283  		JobID(fmt.Sprintf("%v", xtw.idGenerator())),
   284  		now.UnixMilli(), schedFn(),
   285  		fn,
   286  	)
   287  
   288  	if !xtw.isRunning.Load() {
   289  		return nil, infra.WrapErrorStack(ErrTimingWheelStopped)
   290  	}
   291  	if err := xtw.AddTask(task); err != nil {
   292  		return nil, infra.WrapErrorStack(err)
   293  	}
   294  	return task, nil
   295  }
   296  
   297  func (xtw *xTimingWheels) CancelTask(jobID JobID) error {
   298  	if len(jobID) <= 0 {
   299  		return infra.WrapErrorStack(ErrTimingWheelTaskEmptyJobID)
   300  	}
   301  
   302  	if xtw.isRunning.Load() {
   303  		return infra.WrapErrorStack(ErrTimingWheelStopped)
   304  	}
   305  	task, ok := xtw.tasksMap.Get(jobID)
   306  	if !ok {
   307  		return infra.WrapErrorStack(ErrTimingWheelTaskNotFound)
   308  	}
   309  
   310  	event := xtw.twEventPool.Get()
   311  	event.CancelTaskJobID(task.GetJobID())
   312  	return xtw.twEventC.Send(event)
   313  }
   314  
   315  func (xtw *xTimingWheels) schedule(ctx context.Context) {
   316  	if ctx == nil {
   317  		return
   318  	}
   319  	// FIXME Block error mainly caused by producer and consumer speed mismatch, lock data race.
   320  	//  Is there any limitation mechanism could gradually  control different interval task‘s execution timeout timestamp?
   321  	//  Tasks piling up in the same slot will cause the timing wheel to be blocked or delayed.
   322  	_ = xtw.gPool.Submit(func() {
   323  		defer func() {
   324  			if err := recover(); err != nil {
   325  				slog.Error("[x-timing-wheels] event schedule panic recover", "error", err, "stack", debug.Stack())
   326  			}
   327  		}()
   328  		cancelDisabled := ctx.Value(disableTimingWheelsScheduleCancelTask)
   329  		if cancelDisabled == nil {
   330  			cancelDisabled = false
   331  		}
   332  		eventC := xtw.twEventC.Wait()
   333  		slotC := xtw.expiredSlotC.Wait()
   334  		for {
   335  			select {
   336  			case <-ctx.Done():
   337  				xtw.Shutdown()
   338  				return
   339  			case <-xtw.stopC:
   340  				return
   341  			default:
   342  				if xtw.twEventC.IsClosed() {
   343  					slog.Warn("[x-timing-wheels] event channel has been closed")
   344  					return
   345  				}
   346  				if xtw.expiredSlotC.IsClosed() {
   347  					slog.Warn("[x-timing-wheels] slot channel has been closed")
   348  					return
   349  				}
   350  			}
   351  
   352  			select {
   353  			case slot := <-slotC:
   354  				xtw.advanceClock(slot.GetExpirationMs())
   355  				// Here related to slot level upgrade and downgrade.
   356  				if slot != nil && slot.GetExpirationMs() > slotHasBeenFlushedMs {
   357  					xtw.stats.UpdateSlotActiveCount(xtw.dq.Len())
   358  					// Reset the slot, ready for the next round.
   359  					slot.setExpirationMs(slotHasBeenFlushedMs)
   360  					slot.Flush(xtw.handleTask)
   361  				}
   362  			case event := <-eventC:
   363  				switch op := event.GetOperation(); op {
   364  				case addTask, reAddTask:
   365  					task, ok := event.GetTask()
   366  					if !ok {
   367  						goto recycle
   368  					}
   369  					if err := xtw.addTask(task); errors.Is(err, ErrTimingWheelTaskIsExpired) {
   370  						// Avoid data race.
   371  						xtw.handleTask(task)
   372  					}
   373  					if op == addTask {
   374  						xtw.stats.RecordJobAliveCount(1)
   375  					}
   376  				case cancelTask:
   377  					jobID, ok := event.GetCancelTaskJobID()
   378  					if !ok || cancelDisabled.(bool) {
   379  						goto recycle
   380  					}
   381  					// Avoid data race
   382  					_ = xtw.cancelTask(jobID)
   383  				case unknown:
   384  					fallthrough
   385  				default:
   386  
   387  				}
   388  			recycle:
   389  				xtw.twEventPool.Put(event)
   390  			}
   391  		}
   392  	})
   393  	_ = xtw.gPool.Submit(func() {
   394  		func(disabled any) {
   395  			if disabled != nil && disabled.(bool) {
   396  				slog.Warn("[x-timing-wheels] delay queue poll disabled")
   397  				return
   398  			}
   399  			defer func() {
   400  				if err := recover(); err != nil {
   401  					slog.Error("[x-timing-wheels] poll schedule panic recover", "error", err, "stack", debug.Stack())
   402  				}
   403  				slog.Warn("[x-timing-wheels] delay queue exit")
   404  			}()
   405  			xtw.dq.PollToChan(func() int64 {
   406  				return xtw.clock.NowInDefaultTZ().UnixMilli()
   407  			}, xtw.expiredSlotC)
   408  		}(ctx.Value(disableTimingWheelsSchedulePoll))
   409  	})
   410  	xtw.isRunning.Store(true)
   411  }
   412  
   413  // Update all wheels' current time, in order to simulate the time is continuously incremented.
   414  // Here related to slot level upgrade and downgrade.
   415  func (xtw *xTimingWheels) advanceClock(timeoutMs int64) {
   416  	xtw.tw.(*timingWheel).advanceClock(timeoutMs)
   417  }
   418  
   419  func (xtw *xTimingWheels) addTask(task Task) error {
   420  	if task == nil || task.Cancelled() || !xtw.isRunning.Load() {
   421  		return ErrTimingWheelStopped
   422  	}
   423  	err := xtw.tw.(*timingWheel).addTask(task, 0)
   424  	if err == nil || errors.Is(err, ErrTimingWheelTaskIsExpired) {
   425  		xtw.tasksMap.AddOrUpdate(task.GetJobID(), task)
   426  	}
   427  	return infra.WrapErrorStack(err)
   428  }
   429  
   430  // handleTask all tasks which are called by this method
   431  // will mean that the task must be in a slot ever and related slot
   432  // has been expired.
   433  func (xtw *xTimingWheels) handleTask(t Task) {
   434  	if t == nil || !xtw.isRunning.Load() {
   435  		slog.Warn("[x-timing-wheels] handle task failed",
   436  			"task is nil", t == nil,
   437  			"timing wheel is running", xtw.isRunning.Load(),
   438  		)
   439  		return
   440  	}
   441  
   442  	// [slotExpMs, slotExpMs+interval)
   443  	var (
   444  		prevSlotMetadata = t.GetPreviousSlotMetadata()
   445  		slot             = t.GetSlot()
   446  		taskLevel        int64
   447  		runNow           bool
   448  	)
   449  	if prevSlotMetadata == nil && slot != immediateExpiredSlot {
   450  		// Unknown task
   451  		return
   452  	} else if prevSlotMetadata == nil && slot == immediateExpiredSlot {
   453  		runNow = true
   454  	} else if prevSlotMetadata != nil {
   455  		taskLevel = prevSlotMetadata.GetLevel()
   456  		runNow = prevSlotMetadata.GetExpirationMs() == sentinelSlotExpiredMs
   457  		runNow = runNow || (taskLevel == 0 && t.GetExpiredMs() <= prevSlotMetadata.GetExpirationMs()+xtw.GetTickMs())
   458  	}
   459  	runNow = runNow || t.GetExpiredMs() <= xtw.clock.NowInDefaultTZ().UnixMilli()
   460  
   461  	if runNow && !t.Cancelled() {
   462  		job := t.GetJob()
   463  		md := t.GetJobMetadata()
   464  		_ = xtw.gPool.Submit(func() {
   465  			jobStatsWrapper(xtw.stats, job)(xtw.ctx, md)
   466  		})
   467  	} else if t.Cancelled() {
   468  		if slot != nil {
   469  			slot.RemoveTask(t)
   470  		}
   471  		t.setSlot(nil)
   472  		t.setSlotMetadata(nil)
   473  		return
   474  	}
   475  
   476  	// Re-addTask loop job to timing wheel.
   477  	// Upgrade and downgrade (move) the t from one slot to another slot.
   478  	// Lock free.
   479  	switch t.GetJobType() {
   480  	case OnceJob:
   481  		event := xtw.twEventPool.Get()
   482  		if runNow {
   483  			event.CancelTaskJobID(t.GetJobID())
   484  		} else {
   485  			event.ReAddTask(t)
   486  		}
   487  		_ = xtw.twEventC.Send(event)
   488  	case RepeatedJob:
   489  		var sTask Task
   490  		if !runNow {
   491  			sTask = t
   492  		} else {
   493  			if t.GetRestLoopCount() == 0 {
   494  				event := xtw.twEventPool.Get()
   495  				event.CancelTaskJobID(t.GetJobID())
   496  				_ = xtw.twEventC.Send(event)
   497  				return
   498  			}
   499  			_sTask, ok := t.(ScheduledTask)
   500  			if !ok {
   501  				return
   502  			}
   503  			_sTask.UpdateNextScheduledMs()
   504  			sTask = _sTask
   505  			if sTask.GetExpiredMs() < 0 {
   506  				return
   507  			}
   508  		}
   509  		if sTask != nil {
   510  			event := xtw.twEventPool.Get()
   511  			event.ReAddTask(sTask)
   512  			_ = xtw.twEventC.Send(event)
   513  		}
   514  	}
   515  	return
   516  }
   517  
   518  func (xtw *xTimingWheels) cancelTask(jobID JobID) error {
   519  	if !xtw.isRunning.Load() {
   520  		return infra.WrapErrorStack(ErrTimingWheelStopped)
   521  	}
   522  
   523  	task, ok := xtw.tasksMap.Get(jobID)
   524  	if !ok {
   525  		return infra.WrapErrorStack(ErrTimingWheelTaskNotFound)
   526  	}
   527  
   528  	if task.GetSlot() != nil && !task.GetSlot().RemoveTask(task) {
   529  		return infra.WrapErrorStack(ErrTimingWheelTaskUnableToBeRemoved)
   530  	}
   531  
   532  	defer func() {
   533  		xtw.stats.IncreaseJobCancelledCount()
   534  		xtw.stats.RecordJobAliveCount(-1)
   535  	}()
   536  
   537  	task.Cancel()
   538  
   539  	_, err := xtw.tasksMap.Delete(jobID)
   540  	return infra.WrapErrorStack(err)
   541  }
   542  
   543  // NewXTimingWheels creates a new timing wheel.
   544  // The same as the kafka, Time.SYSTEM.hiResClockMs() is used.
   545  func NewXTimingWheels(ctx context.Context, opts ...TimingWheelsOption) TimingWheels {
   546  	if ctx == nil {
   547  		return nil
   548  	}
   549  
   550  	xtwOpt := &xTimingWheelsOption{}
   551  	for _, o := range opts {
   552  		if o != nil {
   553  			o(xtwOpt)
   554  		}
   555  	}
   556  	xtwOpt.Validate()
   557  
   558  	xtw := &xTimingWheels{
   559  		ctx:          ctx,
   560  		stopC:        make(chan struct{}),
   561  		twEventC:     infra.NewSafeClosableChannel[*timingWheelEvent](xtwOpt.getEventBufferSize()),
   562  		expiredSlotC: infra.NewSafeClosableChannel[TimingWheelSlot](xtwOpt.getExpiredSlotBufferSize()),
   563  		tasksMap:     kv.NewThreadSafeMap[JobID, Task](),
   564  		isRunning:    &atomic.Bool{},
   565  		clock:        xtwOpt.getClock(),
   566  		idGenerator:  xtwOpt.getIDGenerator(),
   567  		twEventPool:  newTimingWheelEventsPool(),
   568  		stats:        xtwOpt.getStats(),
   569  		name:         xtwOpt.getName(),
   570  	}
   571  	xtw.isRunning.Store(false)
   572  	if p, err := ants.NewPool(xtwOpt.getWorkerPoolSize(), ants.WithPreAlloc(true)); err != nil {
   573  		panic(err)
   574  	} else {
   575  		xtw.gPool = p
   576  	}
   577  	xtw.dq = queue.NewArrayDelayQueue[TimingWheelSlot](ctx, xtwOpt.defaultDelayQueueCapacity())
   578  	xtw.tw = newTimingWheel(
   579  		ctx,
   580  		xtwOpt.getBasicTickMilliseconds(),
   581  		xtwOpt.getSlotIncrementSize(),
   582  		xtwOpt.getClock().NowInDefaultTZ().UnixMilli(),
   583  		xtw.stats,
   584  		xtw.dq,
   585  		xtw.clock,
   586  	)
   587  	xtw.isRunning.Store(true)
   588  	xtw.schedule(ctx)
   589  	return xtw
   590  }