go.temporal.io/server@v1.23.0/common/tasks/sequential_scheduler.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2023 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package tasks
    26  
    27  import (
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	"go.temporal.io/server/common"
    33  	"go.temporal.io/server/common/backoff"
    34  	"go.temporal.io/server/common/collection"
    35  	"go.temporal.io/server/common/dynamicconfig"
    36  	"go.temporal.io/server/common/log"
    37  	"go.temporal.io/server/common/log/tag"
    38  )
    39  
    40  var _ Scheduler[Task] = (*SequentialScheduler[Task])(nil)
    41  
    42  type (
    43  	SequentialSchedulerOptions struct {
    44  		QueueSize   int
    45  		WorkerCount dynamicconfig.IntPropertyFn
    46  	}
    47  
    48  	SequentialScheduler[T Task] struct {
    49  		status           int32
    50  		shutdownChan     chan struct{}
    51  		shutdownWG       sync.WaitGroup
    52  		workerShutdownCh []chan struct{}
    53  
    54  		options      *SequentialSchedulerOptions
    55  		queues       collection.ConcurrentTxMap
    56  		queueFactory SequentialTaskQueueFactory[T]
    57  		queueChan    chan SequentialTaskQueue[T]
    58  
    59  		logger log.Logger
    60  	}
    61  )
    62  
    63  func NewSequentialScheduler[T Task](
    64  	options *SequentialSchedulerOptions,
    65  	taskQueueHashFn collection.HashFunc,
    66  	taskQueueFactory SequentialTaskQueueFactory[T],
    67  	logger log.Logger,
    68  ) *SequentialScheduler[T] {
    69  	return &SequentialScheduler[T]{
    70  		status:       common.DaemonStatusInitialized,
    71  		shutdownChan: make(chan struct{}),
    72  		options:      options,
    73  
    74  		logger: logger,
    75  
    76  		queueFactory: taskQueueFactory,
    77  		queueChan:    make(chan SequentialTaskQueue[T], options.QueueSize),
    78  		queues:       collection.NewShardedConcurrentTxMap(1024, taskQueueHashFn),
    79  	}
    80  }
    81  
    82  func (s *SequentialScheduler[T]) Start() {
    83  	if !atomic.CompareAndSwapInt32(
    84  		&s.status,
    85  		common.DaemonStatusInitialized,
    86  		common.DaemonStatusStarted,
    87  	) {
    88  		return
    89  	}
    90  
    91  	s.startWorkers(s.options.WorkerCount())
    92  
    93  	s.shutdownWG.Add(1)
    94  	go s.workerMonitor()
    95  
    96  	s.logger.Info("sequential scheduler started")
    97  }
    98  
    99  func (s *SequentialScheduler[T]) Stop() {
   100  	if !atomic.CompareAndSwapInt32(
   101  		&s.status,
   102  		common.DaemonStatusStarted,
   103  		common.DaemonStatusStopped,
   104  	) {
   105  		return
   106  	}
   107  
   108  	close(s.shutdownChan)
   109  	// must be called after the close of the shutdownChan
   110  	s.drainTasks()
   111  
   112  	go func() {
   113  		if success := common.AwaitWaitGroup(&s.shutdownWG, time.Minute); !success {
   114  			s.logger.Warn("sequential scheduler timed out waiting for workers")
   115  		}
   116  	}()
   117  	s.logger.Info("sequential scheduler stopped")
   118  }
   119  
   120  func (s *SequentialScheduler[T]) Submit(task T) {
   121  	queue := s.queueFactory(task)
   122  	queue.Add(task)
   123  
   124  	_, fnEvaluated, err := s.queues.PutOrDo(
   125  		queue.ID(),
   126  		queue,
   127  		func(key interface{}, value interface{}) error {
   128  			value.(SequentialTaskQueue[T]).Add(task)
   129  			return nil
   130  		},
   131  	)
   132  	if err != nil {
   133  		panic("Error is not expected as the evaluation function returns nil")
   134  	}
   135  
   136  	// if function evaluated, meaning that the task set is
   137  	// already dispatched
   138  	if fnEvaluated {
   139  		if s.isStopped() {
   140  			s.drainTasks()
   141  		}
   142  		return
   143  	}
   144  
   145  	// need to dispatch this task set
   146  	select {
   147  	case <-s.shutdownChan:
   148  		task.Abort()
   149  	case s.queueChan <- queue:
   150  		if s.isStopped() {
   151  			s.drainTasks()
   152  		}
   153  	}
   154  }
   155  
   156  func (s *SequentialScheduler[T]) TrySubmit(task T) bool {
   157  	queue := s.queueFactory(task)
   158  	queue.Add(task)
   159  
   160  	_, fnEvaluated, err := s.queues.PutOrDo(
   161  		queue.ID(),
   162  		queue,
   163  		func(key interface{}, value interface{}) error {
   164  			value.(SequentialTaskQueue[T]).Add(task)
   165  			return nil
   166  		},
   167  	)
   168  	if err != nil {
   169  		panic("Error is not expected as the evaluation function returns nil")
   170  	}
   171  	if fnEvaluated {
   172  		if s.isStopped() {
   173  			s.drainTasks()
   174  		}
   175  		return true
   176  	}
   177  
   178  	select {
   179  	case s.queueChan <- queue:
   180  		if s.isStopped() {
   181  			s.drainTasks()
   182  		}
   183  		return true
   184  	default:
   185  		return false
   186  	}
   187  }
   188  
   189  func (s *SequentialScheduler[T]) workerMonitor() {
   190  	defer s.shutdownWG.Done()
   191  
   192  	for {
   193  		timer := time.NewTimer(backoff.Jitter(defaultMonitorTickerDuration, defaultMonitorTickerJitter))
   194  		select {
   195  		case <-s.shutdownChan:
   196  			timer.Stop()
   197  			s.stopWorkers(len(s.workerShutdownCh))
   198  			return
   199  		case <-timer.C:
   200  			targetWorkerNum := s.options.WorkerCount()
   201  			currentWorkerNum := len(s.workerShutdownCh)
   202  
   203  			if targetWorkerNum == currentWorkerNum {
   204  				continue
   205  			}
   206  
   207  			if targetWorkerNum > currentWorkerNum {
   208  				s.startWorkers(targetWorkerNum - currentWorkerNum)
   209  			} else {
   210  				s.stopWorkers(currentWorkerNum - targetWorkerNum)
   211  			}
   212  			s.logger.Info("Update worker pool size", tag.Key("worker-pool-size"), tag.Value(targetWorkerNum))
   213  		}
   214  	}
   215  }
   216  
   217  func (s *SequentialScheduler[T]) startWorkers(
   218  	count int,
   219  ) {
   220  	for i := 0; i < count; i++ {
   221  		shutdownCh := make(chan struct{})
   222  		s.workerShutdownCh = append(s.workerShutdownCh, shutdownCh)
   223  
   224  		s.shutdownWG.Add(1)
   225  		go s.pollTaskQueue(shutdownCh)
   226  	}
   227  }
   228  
   229  func (s *SequentialScheduler[T]) stopWorkers(
   230  	count int,
   231  ) {
   232  	shutdownChToClose := s.workerShutdownCh[:count]
   233  	s.workerShutdownCh = s.workerShutdownCh[count:]
   234  
   235  	for _, shutdownCh := range shutdownChToClose {
   236  		close(shutdownCh)
   237  	}
   238  }
   239  
   240  func (s *SequentialScheduler[T]) pollTaskQueue(workerShutdownCh <-chan struct{}) {
   241  	defer s.shutdownWG.Done()
   242  
   243  	for {
   244  		select {
   245  		case <-s.shutdownChan:
   246  			s.drainTasks()
   247  			return
   248  		case <-workerShutdownCh:
   249  			return
   250  		case queue := <-s.queueChan:
   251  			s.processTaskQueue(queue, workerShutdownCh)
   252  		}
   253  	}
   254  }
   255  
   256  func (s *SequentialScheduler[T]) processTaskQueue(
   257  	queue SequentialTaskQueue[T],
   258  	workerShutdownCh <-chan struct{},
   259  ) {
   260  	for {
   261  		select {
   262  		case <-s.shutdownChan:
   263  			s.drainTasks()
   264  			return
   265  		case <-workerShutdownCh:
   266  			// Put queue back to the queue channel
   267  			s.queueChan <- queue
   268  			return
   269  		default:
   270  			// NOTE: implicit assumption
   271  			// 1. a queue is owned by a coroutine
   272  			// 2. a coroutine will remove a task from its queue then execute the task; this coroutine will ack / nack / reschedule the task at the end
   273  			// 3. queue will be deleted once queue is empty
   274  			//
   275  			// for batched tasks, if task is state
   276  			// ack: behavior is same as normal task
   277  			// nack: batched task will be broken into original tasks, and synchronously added to queue (so queue is not empty)
   278  			// reschedule: behavior is same as normal task
   279  			if !queue.IsEmpty() {
   280  				s.executeTask(queue)
   281  			} else {
   282  				deleted := s.queues.RemoveIf(queue.ID(), func(key interface{}, value interface{}) bool {
   283  					return value.(SequentialTaskQueue[T]).IsEmpty()
   284  				})
   285  				if deleted {
   286  					return
   287  				}
   288  				// if deletion failed, meaning that task queue is offered with new task
   289  				// continue execution
   290  			}
   291  		}
   292  	}
   293  }
   294  
   295  // TODO: change this function to process all available tasks in the queue.
   296  func (s *SequentialScheduler[T]) executeTask(queue SequentialTaskQueue[T]) {
   297  	task := queue.Remove()
   298  	operation := func() error {
   299  		if err := task.Execute(); err != nil {
   300  			return task.HandleErr(err)
   301  		}
   302  		return nil
   303  	}
   304  	isRetryable := func(err error) bool {
   305  		return !s.isStopped() && task.IsRetryableError(err)
   306  	}
   307  	if err := backoff.ThrottleRetry(operation, task.RetryPolicy(), isRetryable); err != nil {
   308  		if s.isStopped() {
   309  			task.Abort()
   310  			return
   311  		}
   312  
   313  		task.Nack(err)
   314  		return
   315  	}
   316  
   317  	task.Ack()
   318  }
   319  
   320  func (s *SequentialScheduler[T]) drainTasks() {
   321  LoopDrainQueues:
   322  	for {
   323  		select {
   324  		case queue := <-s.queueChan:
   325  		LoopDrainSingleQueue:
   326  			for {
   327  				for !queue.IsEmpty() {
   328  					queue.Remove().Abort()
   329  				}
   330  				deleted := s.queues.RemoveIf(queue.ID(), func(key interface{}, value interface{}) bool {
   331  					return value.(SequentialTaskQueue[T]).IsEmpty()
   332  				})
   333  				if deleted {
   334  					break LoopDrainSingleQueue
   335  				}
   336  			}
   337  		default:
   338  			break LoopDrainQueues
   339  		}
   340  	}
   341  }
   342  
   343  func (s *SequentialScheduler[T]) isStopped() bool {
   344  	return atomic.LoadInt32(&s.status) == common.DaemonStatusStopped
   345  }