github.com/matrixorigin/matrixone@v1.2.0/pkg/common/stopper/stopper.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stopper
    16  
    17  import (
    18  	"context"
    19  	"strings"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/logutil"
    26  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    27  
    28  	"go.uber.org/zap"
    29  )
    30  
    31  var (
    32  	// ErrUnavailable stopper is not running
    33  	ErrUnavailable = moerr.NewInternalErrorNoCtx("runner is unavailable")
    34  )
    35  
    36  var (
    37  	defaultStoppedTimeout = time.Second * 30
    38  )
    39  
    40  type state int
    41  
    42  const (
    43  	running  = state(0)
    44  	stopping = state(1)
    45  	stopped  = state(2)
    46  )
    47  
    48  // Option stop option
    49  type Option func(*options)
    50  
    51  type options struct {
    52  	stopTimeout        time.Duration
    53  	logger             *zap.Logger
    54  	timeoutTaskHandler func(tasks []string, timeAfterStop time.Duration)
    55  }
    56  
    57  func (opts *options) adjust() {
    58  	if opts.stopTimeout == 0 {
    59  		opts.stopTimeout = defaultStoppedTimeout
    60  	}
    61  	opts.logger = logutil.Adjust(opts.logger)
    62  }
    63  
    64  // WithStopTimeout the stopper will print the names of tasks that are still running beyond this timeout.
    65  func WithStopTimeout(timeout time.Duration) Option {
    66  	return func(opts *options) {
    67  		opts.stopTimeout = timeout
    68  	}
    69  }
    70  
    71  // WithLogger set the logger
    72  func WithLogger(logger *zap.Logger) Option {
    73  	return func(opts *options) {
    74  		opts.logger = logger
    75  	}
    76  }
    77  
    78  // WithTimeoutTaskHandler set handler to handle timeout tasks
    79  func WithTimeoutTaskHandler(handler func(tasks []string, timeAfterStop time.Duration)) Option {
    80  	return func(opts *options) {
    81  		opts.timeoutTaskHandler = handler
    82  	}
    83  }
    84  
    85  // Stopper a stopper used to manage all tasks that are executed in a separate goroutine,
    86  // and Stopper can manage these goroutines centrally to avoid leaks.
    87  // When Stopper's Stop method is called, if some tasks do not exit within the specified time,
    88  // the names of these tasks will be returned for analysis.
    89  type Stopper struct {
    90  	name  string
    91  	opts  *options
    92  	stopC chan struct{}
    93  
    94  	ctx    context.Context
    95  	cancel context.CancelFunc
    96  
    97  	lastId atomic.Uint64
    98  
    99  	tasks struct {
   100  		sync.RWMutex
   101  		m map[uint64]string
   102  	}
   103  
   104  	mu struct {
   105  		sync.RWMutex
   106  		state state
   107  	}
   108  }
   109  
   110  // NewStopper create a stopper
   111  func NewStopper(name string, opts ...Option) *Stopper {
   112  	s := &Stopper{
   113  		name:  name,
   114  		opts:  &options{},
   115  		stopC: make(chan struct{}),
   116  	}
   117  	s.ctx, s.cancel = context.WithCancel(context.Background())
   118  	s.tasks.m = make(map[uint64]string)
   119  	for _, opt := range opts {
   120  		opt(s.opts)
   121  	}
   122  	s.opts.adjust()
   123  
   124  	s.mu.state = running
   125  	return s
   126  }
   127  
   128  // RunTask run a task that can be cancelled. ErrUnavailable returned if stopped is not running
   129  // See also `RunNamedTask`
   130  // Example:
   131  //
   132  //	err := s.RunTask(func(ctx context.Context) {
   133  //		select {
   134  //		case <-ctx.Done():
   135  //		// cancelled
   136  //		case <-time.After(time.Second):
   137  //			// do something
   138  //		}
   139  //	})
   140  //
   141  //	if err != nil {
   142  //		// handle error
   143  //		return
   144  //	}
   145  func (s *Stopper) RunTask(task func(context.Context)) error {
   146  	return s.RunNamedTask("undefined", task)
   147  }
   148  
   149  // RunNamedTask run a task that can be cancelled. ErrUnavailable returned if stopped is not running
   150  // Example:
   151  //
   152  //	err := s.RunNamedTask("named task", func(ctx context.Context) {
   153  //		select {
   154  //		case <-ctx.Done():
   155  //		// cancelled
   156  //		case <-time.After(time.Second):
   157  //			// do something
   158  //		}
   159  //	})
   160  //
   161  //	if err != nil {
   162  //		// handle error
   163  //		return
   164  //	}
   165  func (s *Stopper) RunNamedTask(name string, task func(context.Context)) error {
   166  	// we use read lock here for avoid race
   167  	s.mu.RLock()
   168  	defer s.mu.RUnlock()
   169  
   170  	if s.mu.state != running {
   171  		return ErrUnavailable
   172  	}
   173  
   174  	id, ctx := s.allocate()
   175  	s.doRunCancelableTask(ctx, id, name, task)
   176  	return nil
   177  }
   178  
   179  func (s *Stopper) RunNamedRetryTask(name string, accountId int32, retryLimit uint32, task func(context.Context, int32) error) error {
   180  	// we use read lock here for avoid race
   181  	s.mu.RLock()
   182  	defer s.mu.RUnlock()
   183  
   184  	if s.mu.state != running {
   185  		return ErrUnavailable
   186  	}
   187  
   188  	id, ctx := s.allocate()
   189  	s.doRunCancelableRetryTask(ctx, id, name, accountId, retryLimit, task)
   190  	return nil
   191  }
   192  
   193  // Stop stops all task, and wait to all tasks canceled. If some tasks do not exit within the specified time,
   194  // the names of these tasks will be print to the given logger.
   195  func (s *Stopper) Stop() {
   196  	s.mu.Lock()
   197  	state := s.mu.state
   198  	s.mu.state = stopping
   199  	s.mu.Unlock()
   200  
   201  	switch state {
   202  	case stopped:
   203  		return
   204  	case stopping:
   205  		<-s.stopC // wait concurrent stop completed
   206  		return
   207  	default:
   208  	}
   209  
   210  	defer func() {
   211  		close(s.stopC)
   212  	}()
   213  
   214  	s.cancel()
   215  
   216  	stopAt := time.Now()
   217  	ticker := time.NewTicker(s.opts.stopTimeout)
   218  	defer ticker.Stop()
   219  
   220  	for {
   221  		select {
   222  		case <-ticker.C:
   223  			tasks := s.runningTasks()
   224  			continuous := time.Since(stopAt)
   225  			s.opts.logger.Warn("tasks still running in stopper",
   226  				zap.String("stopper", s.name),
   227  				zap.Duration("continuous", continuous),
   228  				zap.String("tasks", strings.Join(tasks, ",")))
   229  			if s.opts.timeoutTaskHandler != nil {
   230  				s.opts.timeoutTaskHandler(tasks, continuous)
   231  			}
   232  		default:
   233  			if s.getTaskCount() == 0 {
   234  				return
   235  			}
   236  		}
   237  
   238  		// Such 5ms delay can be a problem if we need to repeatedly create different stoppers,
   239  		// e.g. one stopper for each incoming request.
   240  		time.Sleep(time.Millisecond * 5)
   241  	}
   242  }
   243  
   244  func (s *Stopper) runningTasks() []string {
   245  	s.tasks.RLock()
   246  	defer s.tasks.RUnlock()
   247  	if s.getTaskCount() == 0 {
   248  		return nil
   249  	}
   250  
   251  	tasks := make([]string, 0, len(s.tasks.m))
   252  	for _, name := range s.tasks.m {
   253  		tasks = append(tasks, name)
   254  	}
   255  	return tasks
   256  }
   257  
   258  func (s *Stopper) setupTask(id uint64, name string) {
   259  	s.tasks.Lock()
   260  	defer s.tasks.Unlock()
   261  	s.tasks.m[id] = name
   262  }
   263  
   264  func (s *Stopper) shutdownTask(id uint64) {
   265  	s.tasks.Lock()
   266  	defer s.tasks.Unlock()
   267  	delete(s.tasks.m, id)
   268  }
   269  
   270  func (s *Stopper) doRunCancelableTask(ctx context.Context, taskID uint64, name string, task func(context.Context)) {
   271  	s.setupTask(taskID, name)
   272  	go func() {
   273  		defer func() {
   274  			s.shutdownTask(taskID)
   275  		}()
   276  
   277  		task(ctx)
   278  	}()
   279  }
   280  
   281  // doRunCancelableRetryTask Canceleable and able to retry execute asynchronous tasks
   282  func (s *Stopper) doRunCancelableRetryTask(ctx context.Context,
   283  	taskID uint64,
   284  	name string,
   285  	accountId int32,
   286  	retryLimit uint32,
   287  	task func(context.Context, int32) error) {
   288  	s.setupTask(taskID, name)
   289  	go func() {
   290  		defer func() {
   291  			s.shutdownTask(taskID)
   292  		}()
   293  
   294  		wait := time.Second
   295  		maxWait := time.Second * 10
   296  		for i := 0; i < int(retryLimit); i++ {
   297  			if err := task(ctx, accountId); err == nil {
   298  				return
   299  			}
   300  			time.Sleep(wait)
   301  			wait *= 2
   302  			if wait > maxWait {
   303  				wait = maxWait
   304  			}
   305  			select {
   306  			case <-ctx.Done():
   307  				return
   308  			default:
   309  			}
   310  		}
   311  	}()
   312  }
   313  
   314  func (s *Stopper) allocate() (uint64, context.Context) {
   315  	// fill span{trace_id} in ctx
   316  	return s.lastId.Add(1), trace.Generate(s.ctx)
   317  }
   318  
   319  // getTaskCount returns number of the running task
   320  func (s *Stopper) getTaskCount() int {
   321  	s.tasks.RLock()
   322  	defer s.tasks.RUnlock()
   323  	return len(s.tasks.m)
   324  }