github.com/matrixorigin/matrixone@v0.7.0/pkg/common/stopper/stopper.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stopper
    16  
    17  import (
    18  	"context"
    19  	"strings"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/logutil"
    26  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    27  
    28  	"go.uber.org/zap"
    29  )
    30  
    31  var (
    32  	// ErrUnavailable stopper is not running
    33  	ErrUnavailable = moerr.NewInternalErrorNoCtx("runner is unavailable")
    34  )
    35  
    36  var (
    37  	defaultStoppedTimeout = time.Second * 30
    38  )
    39  
    40  type state int
    41  
    42  const (
    43  	running  = state(0)
    44  	stopping = state(1)
    45  	stopped  = state(2)
    46  )
    47  
    48  // Option stop option
    49  type Option func(*options)
    50  
    51  type options struct {
    52  	stopTimeout        time.Duration
    53  	logger             *zap.Logger
    54  	timeoutTaskHandler func(tasks []string, timeAfterStop time.Duration)
    55  }
    56  
    57  func (opts *options) adjust() {
    58  	if opts.stopTimeout == 0 {
    59  		opts.stopTimeout = defaultStoppedTimeout
    60  	}
    61  	opts.logger = logutil.Adjust(opts.logger)
    62  }
    63  
    64  // WithStopTimeout the stopper will print the names of tasks that are still running beyond this timeout.
    65  func WithStopTimeout(timeout time.Duration) Option {
    66  	return func(opts *options) {
    67  		opts.stopTimeout = timeout
    68  	}
    69  }
    70  
    71  // WithLogger set the logger
    72  func WithLogger(logger *zap.Logger) Option {
    73  	return func(opts *options) {
    74  		opts.logger = logger
    75  	}
    76  }
    77  
    78  // WithTimeoutTaskHandler set handler to handle timeout tasks
    79  func WithTimeoutTaskHandler(handler func(tasks []string, timeAfterStop time.Duration)) Option {
    80  	return func(opts *options) {
    81  		opts.timeoutTaskHandler = handler
    82  	}
    83  }
    84  
    85  // Stopper a stopper used to to manage all tasks that are executed in a separate goroutine,
    86  // and Stopper can manage these goroutines centrally to avoid leaks.
    87  // When Stopper's Stop method is called, if some tasks do not exit within the specified time,
    88  // the names of these tasks will be returned for analysis.
    89  type Stopper struct {
    90  	name    string
    91  	opts    *options
    92  	stopC   chan struct{}
    93  	cancels sync.Map // id -> cancelFunc
    94  	tasks   sync.Map // id -> name
    95  
    96  	atomic struct {
    97  		lastID    uint64
    98  		taskCount int64
    99  	}
   100  
   101  	mu struct {
   102  		sync.RWMutex
   103  		state state
   104  	}
   105  }
   106  
   107  // NewStopper create a stopper
   108  func NewStopper(name string, opts ...Option) *Stopper {
   109  	s := &Stopper{
   110  		name:  name,
   111  		opts:  &options{},
   112  		stopC: make(chan struct{}),
   113  	}
   114  	for _, opt := range opts {
   115  		opt(s.opts)
   116  	}
   117  	s.opts.adjust()
   118  
   119  	s.mu.state = running
   120  	return s
   121  }
   122  
   123  // RunTask run a task that can be cancelled. ErrUnavailable returned if stopped is not running
   124  // See also `RunNamedTask`
   125  // Example:
   126  //
   127  //	err := s.RunTask(func(ctx context.Context) {
   128  //		select {
   129  //		case <-ctx.Done():
   130  //		// cancelled
   131  //		case <-time.After(time.Second):
   132  //			// do something
   133  //		}
   134  //	})
   135  //
   136  //	if err != nil {
   137  //		// hanle error
   138  //		return
   139  //	}
   140  func (s *Stopper) RunTask(task func(context.Context)) error {
   141  	return s.RunNamedTask("undefined", task)
   142  }
   143  
   144  // RunNamedTask run a task that can be cancelled. ErrUnavailable returned if stopped is not running
   145  // Example:
   146  //
   147  //	err := s.RunNamedTask("named task", func(ctx context.Context) {
   148  //		select {
   149  //		case <-ctx.Done():
   150  //		// cancelled
   151  //		case <-time.After(time.Second):
   152  //			// do something
   153  //		}
   154  //	})
   155  //
   156  //	if err != nil {
   157  //		// hanle error
   158  //		return
   159  //	}
   160  func (s *Stopper) RunNamedTask(name string, task func(context.Context)) error {
   161  	// we use read lock here for avoid race
   162  	s.mu.RLock()
   163  	defer s.mu.RUnlock()
   164  
   165  	if s.mu.state != running {
   166  		return ErrUnavailable
   167  	}
   168  
   169  	id, ctx := s.allocate()
   170  	s.doRunCancelableTask(ctx, id, name, task)
   171  	return nil
   172  }
   173  
   174  // Stop stop all task, and wait to all tasks canceled. If some tasks do not exit within the specified time,
   175  // the names of these tasks will be print to the given logger.
   176  func (s *Stopper) Stop() {
   177  	s.mu.Lock()
   178  	state := s.mu.state
   179  	s.mu.state = stopping
   180  	s.mu.Unlock()
   181  
   182  	switch state {
   183  	case stopped:
   184  		return
   185  	case stopping:
   186  		<-s.stopC // wait concurrent stop completed
   187  		return
   188  	}
   189  
   190  	defer func() {
   191  		close(s.stopC)
   192  	}()
   193  
   194  	s.cancels.Range(func(key, value interface{}) bool {
   195  		cancel := value.(context.CancelFunc)
   196  		cancel()
   197  		return true
   198  	})
   199  
   200  	stopAt := time.Now()
   201  	timer := time.NewTimer(s.opts.stopTimeout)
   202  	defer timer.Stop()
   203  
   204  	for {
   205  		select {
   206  		case <-timer.C:
   207  			tasks := s.runningTasks()
   208  			continuous := time.Since(stopAt)
   209  			s.opts.logger.Warn("tasks still running in stopper",
   210  				zap.String("stopper", s.name),
   211  				zap.Duration("continuous", continuous),
   212  				zap.String("tasks", strings.Join(tasks, ",")))
   213  			if s.opts.timeoutTaskHandler != nil {
   214  				s.opts.timeoutTaskHandler(tasks, continuous)
   215  			}
   216  			timer.Reset(s.opts.stopTimeout)
   217  		default:
   218  			if s.GetTaskCount() == 0 {
   219  				return
   220  			}
   221  		}
   222  
   223  		// Such 5ms delay can be a problem if we need to repeatedly create different stoppers,
   224  		// e.g. one stopper for each incoming request.
   225  		time.Sleep(time.Millisecond * 5)
   226  	}
   227  }
   228  
   229  func (s *Stopper) runningTasks() []string {
   230  	if s.GetTaskCount() == 0 {
   231  		return nil
   232  	}
   233  
   234  	var tasks []string
   235  	s.tasks.Range(func(key, value interface{}) bool {
   236  		tasks = append(tasks, value.(string))
   237  		return true
   238  	})
   239  	return tasks
   240  }
   241  
   242  func (s *Stopper) setupTask(id uint64, name string) {
   243  	s.tasks.Store(id, name)
   244  	s.addTask(1)
   245  }
   246  
   247  func (s *Stopper) shutdownTask(id uint64) {
   248  	s.tasks.Delete(id)
   249  	s.addTask(-1)
   250  }
   251  
   252  func (s *Stopper) doRunCancelableTask(ctx context.Context, taskID uint64, name string, task func(context.Context)) {
   253  	s.setupTask(taskID, name)
   254  	go func() {
   255  		defer func() {
   256  			s.shutdownTask(taskID)
   257  		}()
   258  
   259  		task(ctx)
   260  	}()
   261  }
   262  
   263  func (s *Stopper) allocate() (uint64, context.Context) {
   264  	ctx, cancel := context.WithCancel(context.Background())
   265  	ctx = trace.Generate(ctx) // fill span{trace_id} in ctx
   266  	id := s.nextTaskID()
   267  	s.cancels.Store(id, cancel)
   268  	return id, ctx
   269  }
   270  
   271  func (s *Stopper) nextTaskID() uint64 {
   272  	return atomic.AddUint64(&s.atomic.lastID, 1)
   273  }
   274  
   275  func (s *Stopper) addTask(v int64) {
   276  	atomic.AddInt64(&s.atomic.taskCount, v)
   277  }
   278  
   279  // GetTaskCount returns number of the running task
   280  func (s *Stopper) GetTaskCount() int64 {
   281  	return atomic.LoadInt64(&s.atomic.taskCount)
   282  }