github.com/wfusion/gofusion@v1.1.14/common/infra/asynq/recoverer.go (about) 1 // Copyright 2020 Kentaro Hibino. All rights reserved. 2 // Use of this source code is governed by a MIT license 3 // that can be found in the LICENSE file. 4 5 package asynq 6 7 import ( 8 "context" 9 "sync" 10 "time" 11 12 "github.com/wfusion/gofusion/common/infra/asynq/pkg/base" 13 "github.com/wfusion/gofusion/common/infra/asynq/pkg/errors" 14 "github.com/wfusion/gofusion/common/infra/asynq/pkg/log" 15 ) 16 17 type recoverer struct { 18 logger *log.Logger 19 broker base.Broker 20 retryDelayFunc RetryDelayFunc 21 isFailureFunc func(error) bool 22 23 // channel to communicate back to the long running "recoverer" goroutine. 24 done chan struct{} 25 26 // list of queues to check for deadline. 27 queues []string 28 29 // poll interval. 30 interval time.Duration 31 } 32 33 type recovererParams struct { 34 logger *log.Logger 35 broker base.Broker 36 queues []string 37 interval time.Duration 38 retryDelayFunc RetryDelayFunc 39 isFailureFunc func(error) bool 40 } 41 42 func newRecoverer(params recovererParams) *recoverer { 43 return &recoverer{ 44 logger: params.logger, 45 broker: params.broker, 46 done: make(chan struct{}), 47 queues: params.queues, 48 interval: params.interval, 49 retryDelayFunc: params.retryDelayFunc, 50 isFailureFunc: params.isFailureFunc, 51 } 52 } 53 54 func (r *recoverer) shutdown() { 55 r.logger.Debug("[Common] asynq recoverer shutting down...") 56 // Signal the recoverer goroutine to stop polling. 57 r.done <- struct{}{} 58 } 59 60 func (r *recoverer) start(wg *sync.WaitGroup) { 61 wg.Add(1) 62 go func() { 63 defer wg.Done() 64 r.recover() 65 timer := time.NewTimer(r.interval) 66 for { 67 select { 68 case <-r.done: 69 r.logger.Debug("[Common] asynq recoverer done") 70 timer.Stop() 71 return 72 case <-timer.C: 73 r.recover() 74 timer.Reset(r.interval) 75 } 76 } 77 }() 78 } 79 80 // ErrLeaseExpired error indicates that the task failed because the worker working on the task 81 // could not extend its lease due to missing heartbeats. The worker may have crashed or got cutoff from the network. 82 var ErrLeaseExpired = errors.New("asynq: task lease expired") 83 84 func (r *recoverer) recover() { 85 r.recoverLeaseExpiredTasks() 86 r.recoverStaleAggregationSets() 87 } 88 89 func (r *recoverer) recoverLeaseExpiredTasks() { 90 // Get all tasks which have expired 30 seconds ago or earlier to accommodate certain amount of clock skew. 91 cutoff := time.Now().Add(-30 * time.Second) 92 msgs, err := r.broker.ListLeaseExpired(cutoff, r.queues...) 93 if err != nil { 94 r.logger.Warnf("[Common] asynq recoverer: could not list lease expired tasks: %v", err) 95 return 96 } 97 for _, msg := range msgs { 98 if msg.Retried >= msg.Retry { 99 r.archive(msg, ErrLeaseExpired) 100 } else { 101 r.retry(msg, ErrLeaseExpired) 102 } 103 } 104 } 105 106 func (r *recoverer) recoverStaleAggregationSets() { 107 for _, qname := range r.queues { 108 if err := r.broker.ReclaimStaleAggregationSets(qname); err != nil { 109 r.logger.Warnf("[Common] asynq recoverer: could not reclaim stale aggregation sets in queue %q: %v", 110 qname, err) 111 } 112 } 113 } 114 115 func (r *recoverer) retry(msg *base.TaskMessage, err error) { 116 delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload)) 117 retryAt := time.Now().Add(delay) 118 if err := r.broker.Retry(context.Background(), msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil { 119 r.logger.Warnf("[Common] asynq recoverer: could not retry lease expired task: %v", err) 120 } 121 } 122 123 func (r *recoverer) archive(msg *base.TaskMessage, err error) { 124 if err := r.broker.Archive(context.Background(), msg, err.Error()); err != nil { 125 r.logger.Warnf("[Common] asynq recoverer: could not move task to archive: %v", err) 126 } 127 }