github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/pkg/workerpool/pool_impl.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package workerpool 15 16 import ( 17 "context" 18 "sync" 19 "sync/atomic" 20 "time" 21 22 "github.com/pingcap/log" 23 24 "github.com/pingcap/errors" 25 "github.com/pingcap/failpoint" 26 cerrors "github.com/pingcap/ticdc/pkg/errors" 27 "github.com/pingcap/ticdc/pkg/notify" 28 "go.uber.org/zap" 29 "golang.org/x/sync/errgroup" 30 ) 31 32 const ( 33 workerPoolDefaultClockSourceInterval = time.Millisecond * 100 34 ) 35 36 type defaultPoolImpl struct { 37 // assume the hasher to be the trivial hasher for now 38 hasher Hasher 39 // do not resize this slice after creating the pool 40 workers []*worker 41 // used to generate handler IDs, must be accessed atomically 42 nextHandlerID int64 43 } 44 45 // NewDefaultWorkerPool creates a new WorkerPool that uses the default implementation 46 func NewDefaultWorkerPool(numWorkers int) WorkerPool { 47 return newDefaultPoolImpl(&defaultHasher{}, numWorkers) 48 } 49 50 func newDefaultPoolImpl(hasher Hasher, numWorkers int) *defaultPoolImpl { 51 workers := make([]*worker, numWorkers) 52 for i := 0; i < numWorkers; i++ { 53 workers[i] = newWorker() 54 } 55 return &defaultPoolImpl{ 56 hasher: hasher, 57 workers: workers, 58 } 59 } 60 61 func (p *defaultPoolImpl) Run(ctx context.Context) error { 62 errg, ctx := errgroup.WithContext(ctx) 63 64 for _, worker := range p.workers { 65 workerFinal := worker 66 errg.Go(func() error { 67 err := workerFinal.run(ctx) 68 if err != nil { 69 return errors.Trace(err) 70 } 71 return nil 72 }) 73 } 74 75 return errg.Wait() 76 } 77 78 func (p *defaultPoolImpl) RegisterEvent(f func(ctx context.Context, event interface{}) error) EventHandle { 79 handler := &defaultEventHandle{ 80 f: f, 81 errCh: make(chan error, 1), 82 id: atomic.AddInt64(&p.nextHandlerID, 1) - 1, 83 } 84 85 workerID := p.hasher.Hash(handler) % int64(len(p.workers)) 86 p.workers[workerID].addHandle(handler) 87 handler.worker = p.workers[workerID] 88 89 return handler 90 } 91 92 type defaultEventHandle struct { 93 // the function to be run each time the event is triggered 94 f func(ctx context.Context, event interface{}) error 95 // whether this handle has been cancelled, must be accessed atomically 96 isCancelled int32 97 // channel for the error returned by f 98 errCh chan error 99 // the worker that the handle is associated with 100 worker *worker 101 // identifier for this handle. No significant usage for now. 102 // Might be used to support consistent hashing in the future, 103 // so that the pool can be resized efficiently. 104 id int64 105 106 // whether there is a valid timer handler, must be accessed atomically 107 hasTimer int32 108 // the time when timer was triggered the last time 109 lastTimer time.Time 110 // minimum interval between two timer calls 111 timerInterval time.Duration 112 // the handler for the timer 113 timerHandler func(ctx context.Context) error 114 115 // whether this is a valid errorHandler, must be accessed atomically 116 hasErrorHandler int32 117 // the error handler, called when the handle meets an error (which is returned by f) 118 errorHandler func(err error) 119 } 120 121 func (h *defaultEventHandle) AddEvent(ctx context.Context, event interface{}) error { 122 if atomic.LoadInt32(&h.isCancelled) == 1 { 123 return cerrors.ErrWorkerPoolHandleCancelled.GenWithStackByArgs() 124 } 125 126 failpoint.Inject("addEventDelayPoint", func() {}) 127 128 task := task{ 129 handle: h, 130 f: func(ctx1 context.Context) error { 131 return h.f(ctx, event) 132 }, 133 } 134 135 select { 136 case <-ctx.Done(): 137 return errors.Trace(ctx.Err()) 138 case h.worker.taskCh <- task: 139 } 140 return nil 141 } 142 143 func (h *defaultEventHandle) SetTimer(ctx context.Context, interval time.Duration, f func(ctx context.Context) error) EventHandle { 144 // mark the timer handler function as invalid 145 atomic.StoreInt32(&h.hasTimer, 0) 146 // wait for `hasTimer` to take effect, otherwise we might have a data race, if there was a previous handler. 147 h.worker.synchronize() 148 149 h.timerInterval = interval 150 h.timerHandler = func(ctx1 context.Context) error { 151 return f(ctx) 152 } 153 // mark the timer handler function as valid 154 atomic.StoreInt32(&h.hasTimer, 1) 155 156 return h 157 } 158 159 func (h *defaultEventHandle) Unregister() { 160 if !atomic.CompareAndSwapInt32(&h.isCancelled, 0, 1) { 161 // already cancelled 162 return 163 } 164 165 failpoint.Inject("unregisterDelayPoint", func() {}) 166 167 // call synchronize so that all function executions related to this handle will be 168 // linearized BEFORE Unregister. 169 h.worker.synchronize() 170 171 h.doCancel(cerrors.ErrWorkerPoolHandleCancelled.GenWithStackByArgs()) 172 } 173 174 // callers of doCancel need to check h.isCancelled first. 175 // DO NOT call doCancel multiple times on the same handle. 176 func (h *defaultEventHandle) doCancel(err error) { 177 h.worker.removeHandle(h) 178 179 if atomic.LoadInt32(&h.hasErrorHandler) == 1 { 180 h.errorHandler(err) 181 } 182 183 h.errCh <- err 184 close(h.errCh) 185 } 186 187 func (h *defaultEventHandle) ErrCh() <-chan error { 188 return h.errCh 189 } 190 191 func (h *defaultEventHandle) OnExit(f func(err error)) EventHandle { 192 atomic.StoreInt32(&h.hasErrorHandler, 0) 193 h.worker.synchronize() 194 h.errorHandler = f 195 atomic.StoreInt32(&h.hasErrorHandler, 1) 196 return h 197 } 198 199 func (h *defaultEventHandle) HashCode() int64 { 200 return h.id 201 } 202 203 func (h *defaultEventHandle) cancelWithErr(err error) { 204 if !atomic.CompareAndSwapInt32(&h.isCancelled, 0, 1) { 205 // already cancelled 206 return 207 } 208 209 h.doCancel(err) 210 } 211 212 func (h *defaultEventHandle) durationSinceLastTimer() time.Duration { 213 return time.Since(h.lastTimer) 214 } 215 216 func (h *defaultEventHandle) doTimer(ctx context.Context) error { 217 if atomic.LoadInt32(&h.hasTimer) == 0 { 218 return nil 219 } 220 221 if h.durationSinceLastTimer() < h.timerInterval { 222 return nil 223 } 224 225 err := h.timerHandler(ctx) 226 if err != nil { 227 return errors.Trace(err) 228 } 229 230 h.lastTimer = time.Now() 231 232 return nil 233 } 234 235 type task struct { 236 handle *defaultEventHandle 237 f func(ctx context.Context) error 238 } 239 240 type worker struct { 241 taskCh chan task 242 handles map[*defaultEventHandle]struct{} 243 handleRWLock sync.RWMutex 244 // A message is passed to handleCancelCh when we need to wait for the 245 // current execution of handler to finish. Should be BLOCKING. 246 handleCancelCh chan struct{} 247 // must be accessed atomically 248 isRunning int32 249 // notifies exits of run() 250 stopNotifier notify.Notifier 251 } 252 253 func newWorker() *worker { 254 return &worker{ 255 taskCh: make(chan task, 128), 256 handles: make(map[*defaultEventHandle]struct{}), 257 handleCancelCh: make(chan struct{}), // this channel must be unbuffered, i.e. blocking 258 } 259 } 260 261 func (w *worker) run(ctx context.Context) error { 262 ticker := time.NewTicker(workerPoolDefaultClockSourceInterval) 263 atomic.StoreInt32(&w.isRunning, 1) 264 defer func() { 265 ticker.Stop() 266 atomic.StoreInt32(&w.isRunning, 0) 267 w.stopNotifier.Notify() 268 }() 269 270 for { 271 select { 272 case <-ctx.Done(): 273 return errors.Trace(ctx.Err()) 274 case task := <-w.taskCh: 275 if atomic.LoadInt32(&task.handle.isCancelled) == 1 { 276 // ignored cancelled handle 277 continue 278 } 279 280 err := task.f(ctx) 281 if err != nil { 282 task.handle.cancelWithErr(err) 283 } 284 case <-ticker.C: 285 var handleErrs []struct { 286 h *defaultEventHandle 287 e error 288 } 289 290 w.handleRWLock.RLock() 291 for handle := range w.handles { 292 if atomic.LoadInt32(&handle.isCancelled) == 1 { 293 // ignored cancelled handle 294 continue 295 } 296 err := handle.doTimer(ctx) 297 if err != nil { 298 handleErrs = append(handleErrs, struct { 299 h *defaultEventHandle 300 e error 301 }{handle, err}) 302 } 303 } 304 w.handleRWLock.RUnlock() 305 306 // cancelWithErr must be called out side of the loop above, 307 // to avoid deadlock. 308 for _, handleErr := range handleErrs { 309 handleErr.h.cancelWithErr(handleErr.e) 310 } 311 case <-w.handleCancelCh: 312 } 313 } 314 } 315 316 // synchronize waits for the worker to loop at least once, or to exit. 317 func (w *worker) synchronize() { 318 if atomic.LoadInt32(&w.isRunning) == 0 { 319 return 320 } 321 322 receiver, err := w.stopNotifier.NewReceiver(time.Millisecond * 100) 323 if err != nil { 324 if cerrors.ErrOperateOnClosedNotifier.Equal(errors.Cause(err)) { 325 return 326 } 327 log.Panic("unexpected error", zap.Error(err)) 328 } 329 defer receiver.Stop() 330 331 startTime := time.Now() 332 for { 333 workerHasFinishedLoop := false 334 select { 335 case w.handleCancelCh <- struct{}{}: 336 workerHasFinishedLoop = true 337 case <-receiver.C: 338 } 339 if workerHasFinishedLoop || atomic.LoadInt32(&w.isRunning) == 0 { 340 break 341 } 342 343 if time.Since(startTime) > time.Second*10 { 344 // likely the workerpool has deadlocked, or there is a bug in the event handlers. 345 log.Warn("synchronize is taking too long, report a bug", zap.Duration("elapsed", time.Since(startTime))) 346 } 347 } 348 } 349 350 func (w *worker) addHandle(handle *defaultEventHandle) { 351 w.handleRWLock.Lock() 352 defer w.handleRWLock.Unlock() 353 354 w.handles[handle] = struct{}{} 355 } 356 357 func (w *worker) removeHandle(handle *defaultEventHandle) { 358 w.handleRWLock.Lock() 359 defer w.handleRWLock.Unlock() 360 361 delete(w.handles, handle) 362 }