github.com/matrixorigin/matrixone@v1.2.0/pkg/common/stopper/stopper.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stopper 16 17 import ( 18 "context" 19 "strings" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/logutil" 26 "github.com/matrixorigin/matrixone/pkg/util/trace" 27 28 "go.uber.org/zap" 29 ) 30 31 var ( 32 // ErrUnavailable stopper is not running 33 ErrUnavailable = moerr.NewInternalErrorNoCtx("runner is unavailable") 34 ) 35 36 var ( 37 defaultStoppedTimeout = time.Second * 30 38 ) 39 40 type state int 41 42 const ( 43 running = state(0) 44 stopping = state(1) 45 stopped = state(2) 46 ) 47 48 // Option stop option 49 type Option func(*options) 50 51 type options struct { 52 stopTimeout time.Duration 53 logger *zap.Logger 54 timeoutTaskHandler func(tasks []string, timeAfterStop time.Duration) 55 } 56 57 func (opts *options) adjust() { 58 if opts.stopTimeout == 0 { 59 opts.stopTimeout = defaultStoppedTimeout 60 } 61 opts.logger = logutil.Adjust(opts.logger) 62 } 63 64 // WithStopTimeout the stopper will print the names of tasks that are still running beyond this timeout. 65 func WithStopTimeout(timeout time.Duration) Option { 66 return func(opts *options) { 67 opts.stopTimeout = timeout 68 } 69 } 70 71 // WithLogger set the logger 72 func WithLogger(logger *zap.Logger) Option { 73 return func(opts *options) { 74 opts.logger = logger 75 } 76 } 77 78 // WithTimeoutTaskHandler set handler to handle timeout tasks 79 func WithTimeoutTaskHandler(handler func(tasks []string, timeAfterStop time.Duration)) Option { 80 return func(opts *options) { 81 opts.timeoutTaskHandler = handler 82 } 83 } 84 85 // Stopper a stopper used to manage all tasks that are executed in a separate goroutine, 86 // and Stopper can manage these goroutines centrally to avoid leaks. 87 // When Stopper's Stop method is called, if some tasks do not exit within the specified time, 88 // the names of these tasks will be returned for analysis. 89 type Stopper struct { 90 name string 91 opts *options 92 stopC chan struct{} 93 94 ctx context.Context 95 cancel context.CancelFunc 96 97 lastId atomic.Uint64 98 99 tasks struct { 100 sync.RWMutex 101 m map[uint64]string 102 } 103 104 mu struct { 105 sync.RWMutex 106 state state 107 } 108 } 109 110 // NewStopper create a stopper 111 func NewStopper(name string, opts ...Option) *Stopper { 112 s := &Stopper{ 113 name: name, 114 opts: &options{}, 115 stopC: make(chan struct{}), 116 } 117 s.ctx, s.cancel = context.WithCancel(context.Background()) 118 s.tasks.m = make(map[uint64]string) 119 for _, opt := range opts { 120 opt(s.opts) 121 } 122 s.opts.adjust() 123 124 s.mu.state = running 125 return s 126 } 127 128 // RunTask run a task that can be cancelled. ErrUnavailable returned if stopped is not running 129 // See also `RunNamedTask` 130 // Example: 131 // 132 // err := s.RunTask(func(ctx context.Context) { 133 // select { 134 // case <-ctx.Done(): 135 // // cancelled 136 // case <-time.After(time.Second): 137 // // do something 138 // } 139 // }) 140 // 141 // if err != nil { 142 // // handle error 143 // return 144 // } 145 func (s *Stopper) RunTask(task func(context.Context)) error { 146 return s.RunNamedTask("undefined", task) 147 } 148 149 // RunNamedTask run a task that can be cancelled. ErrUnavailable returned if stopped is not running 150 // Example: 151 // 152 // err := s.RunNamedTask("named task", func(ctx context.Context) { 153 // select { 154 // case <-ctx.Done(): 155 // // cancelled 156 // case <-time.After(time.Second): 157 // // do something 158 // } 159 // }) 160 // 161 // if err != nil { 162 // // handle error 163 // return 164 // } 165 func (s *Stopper) RunNamedTask(name string, task func(context.Context)) error { 166 // we use read lock here for avoid race 167 s.mu.RLock() 168 defer s.mu.RUnlock() 169 170 if s.mu.state != running { 171 return ErrUnavailable 172 } 173 174 id, ctx := s.allocate() 175 s.doRunCancelableTask(ctx, id, name, task) 176 return nil 177 } 178 179 func (s *Stopper) RunNamedRetryTask(name string, accountId int32, retryLimit uint32, task func(context.Context, int32) error) error { 180 // we use read lock here for avoid race 181 s.mu.RLock() 182 defer s.mu.RUnlock() 183 184 if s.mu.state != running { 185 return ErrUnavailable 186 } 187 188 id, ctx := s.allocate() 189 s.doRunCancelableRetryTask(ctx, id, name, accountId, retryLimit, task) 190 return nil 191 } 192 193 // Stop stops all task, and wait to all tasks canceled. If some tasks do not exit within the specified time, 194 // the names of these tasks will be print to the given logger. 195 func (s *Stopper) Stop() { 196 s.mu.Lock() 197 state := s.mu.state 198 s.mu.state = stopping 199 s.mu.Unlock() 200 201 switch state { 202 case stopped: 203 return 204 case stopping: 205 <-s.stopC // wait concurrent stop completed 206 return 207 default: 208 } 209 210 defer func() { 211 close(s.stopC) 212 }() 213 214 s.cancel() 215 216 stopAt := time.Now() 217 ticker := time.NewTicker(s.opts.stopTimeout) 218 defer ticker.Stop() 219 220 for { 221 select { 222 case <-ticker.C: 223 tasks := s.runningTasks() 224 continuous := time.Since(stopAt) 225 s.opts.logger.Warn("tasks still running in stopper", 226 zap.String("stopper", s.name), 227 zap.Duration("continuous", continuous), 228 zap.String("tasks", strings.Join(tasks, ","))) 229 if s.opts.timeoutTaskHandler != nil { 230 s.opts.timeoutTaskHandler(tasks, continuous) 231 } 232 default: 233 if s.getTaskCount() == 0 { 234 return 235 } 236 } 237 238 // Such 5ms delay can be a problem if we need to repeatedly create different stoppers, 239 // e.g. one stopper for each incoming request. 240 time.Sleep(time.Millisecond * 5) 241 } 242 } 243 244 func (s *Stopper) runningTasks() []string { 245 s.tasks.RLock() 246 defer s.tasks.RUnlock() 247 if s.getTaskCount() == 0 { 248 return nil 249 } 250 251 tasks := make([]string, 0, len(s.tasks.m)) 252 for _, name := range s.tasks.m { 253 tasks = append(tasks, name) 254 } 255 return tasks 256 } 257 258 func (s *Stopper) setupTask(id uint64, name string) { 259 s.tasks.Lock() 260 defer s.tasks.Unlock() 261 s.tasks.m[id] = name 262 } 263 264 func (s *Stopper) shutdownTask(id uint64) { 265 s.tasks.Lock() 266 defer s.tasks.Unlock() 267 delete(s.tasks.m, id) 268 } 269 270 func (s *Stopper) doRunCancelableTask(ctx context.Context, taskID uint64, name string, task func(context.Context)) { 271 s.setupTask(taskID, name) 272 go func() { 273 defer func() { 274 s.shutdownTask(taskID) 275 }() 276 277 task(ctx) 278 }() 279 } 280 281 // doRunCancelableRetryTask Canceleable and able to retry execute asynchronous tasks 282 func (s *Stopper) doRunCancelableRetryTask(ctx context.Context, 283 taskID uint64, 284 name string, 285 accountId int32, 286 retryLimit uint32, 287 task func(context.Context, int32) error) { 288 s.setupTask(taskID, name) 289 go func() { 290 defer func() { 291 s.shutdownTask(taskID) 292 }() 293 294 wait := time.Second 295 maxWait := time.Second * 10 296 for i := 0; i < int(retryLimit); i++ { 297 if err := task(ctx, accountId); err == nil { 298 return 299 } 300 time.Sleep(wait) 301 wait *= 2 302 if wait > maxWait { 303 wait = maxWait 304 } 305 select { 306 case <-ctx.Done(): 307 return 308 default: 309 } 310 } 311 }() 312 } 313 314 func (s *Stopper) allocate() (uint64, context.Context) { 315 // fill span{trace_id} in ctx 316 return s.lastId.Add(1), trace.Generate(s.ctx) 317 } 318 319 // getTaskCount returns number of the running task 320 func (s *Stopper) getTaskCount() int { 321 s.tasks.RLock() 322 defer s.tasks.RUnlock() 323 return len(s.tasks.m) 324 }