github.com/wfusion/gofusion@v1.1.14/common/infra/asynq/recoverer.go (about)

     1  // Copyright 2020 Kentaro Hibino. All rights reserved.
     2  // Use of this source code is governed by a MIT license
     3  // that can be found in the LICENSE file.
     4  
     5  package asynq
     6  
     7  import (
     8  	"context"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/wfusion/gofusion/common/infra/asynq/pkg/base"
    13  	"github.com/wfusion/gofusion/common/infra/asynq/pkg/errors"
    14  	"github.com/wfusion/gofusion/common/infra/asynq/pkg/log"
    15  )
    16  
    17  type recoverer struct {
    18  	logger         *log.Logger
    19  	broker         base.Broker
    20  	retryDelayFunc RetryDelayFunc
    21  	isFailureFunc  func(error) bool
    22  
    23  	// channel to communicate back to the long running "recoverer" goroutine.
    24  	done chan struct{}
    25  
    26  	// list of queues to check for deadline.
    27  	queues []string
    28  
    29  	// poll interval.
    30  	interval time.Duration
    31  }
    32  
    33  type recovererParams struct {
    34  	logger         *log.Logger
    35  	broker         base.Broker
    36  	queues         []string
    37  	interval       time.Duration
    38  	retryDelayFunc RetryDelayFunc
    39  	isFailureFunc  func(error) bool
    40  }
    41  
    42  func newRecoverer(params recovererParams) *recoverer {
    43  	return &recoverer{
    44  		logger:         params.logger,
    45  		broker:         params.broker,
    46  		done:           make(chan struct{}),
    47  		queues:         params.queues,
    48  		interval:       params.interval,
    49  		retryDelayFunc: params.retryDelayFunc,
    50  		isFailureFunc:  params.isFailureFunc,
    51  	}
    52  }
    53  
    54  func (r *recoverer) shutdown() {
    55  	r.logger.Debug("[Common] asynq recoverer shutting down...")
    56  	// Signal the recoverer goroutine to stop polling.
    57  	r.done <- struct{}{}
    58  }
    59  
    60  func (r *recoverer) start(wg *sync.WaitGroup) {
    61  	wg.Add(1)
    62  	go func() {
    63  		defer wg.Done()
    64  		r.recover()
    65  		timer := time.NewTimer(r.interval)
    66  		for {
    67  			select {
    68  			case <-r.done:
    69  				r.logger.Debug("[Common] asynq recoverer done")
    70  				timer.Stop()
    71  				return
    72  			case <-timer.C:
    73  				r.recover()
    74  				timer.Reset(r.interval)
    75  			}
    76  		}
    77  	}()
    78  }
    79  
    80  // ErrLeaseExpired error indicates that the task failed because the worker working on the task
    81  // could not extend its lease due to missing heartbeats. The worker may have crashed or got cutoff from the network.
    82  var ErrLeaseExpired = errors.New("asynq: task lease expired")
    83  
    84  func (r *recoverer) recover() {
    85  	r.recoverLeaseExpiredTasks()
    86  	r.recoverStaleAggregationSets()
    87  }
    88  
    89  func (r *recoverer) recoverLeaseExpiredTasks() {
    90  	// Get all tasks which have expired 30 seconds ago or earlier to accommodate certain amount of clock skew.
    91  	cutoff := time.Now().Add(-30 * time.Second)
    92  	msgs, err := r.broker.ListLeaseExpired(cutoff, r.queues...)
    93  	if err != nil {
    94  		r.logger.Warnf("[Common] asynq recoverer: could not list lease expired tasks: %v", err)
    95  		return
    96  	}
    97  	for _, msg := range msgs {
    98  		if msg.Retried >= msg.Retry {
    99  			r.archive(msg, ErrLeaseExpired)
   100  		} else {
   101  			r.retry(msg, ErrLeaseExpired)
   102  		}
   103  	}
   104  }
   105  
   106  func (r *recoverer) recoverStaleAggregationSets() {
   107  	for _, qname := range r.queues {
   108  		if err := r.broker.ReclaimStaleAggregationSets(qname); err != nil {
   109  			r.logger.Warnf("[Common] asynq recoverer: could not reclaim stale aggregation sets in queue %q: %v",
   110  				qname, err)
   111  		}
   112  	}
   113  }
   114  
   115  func (r *recoverer) retry(msg *base.TaskMessage, err error) {
   116  	delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload))
   117  	retryAt := time.Now().Add(delay)
   118  	if err := r.broker.Retry(context.Background(), msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil {
   119  		r.logger.Warnf("[Common] asynq recoverer: could not retry lease expired task: %v", err)
   120  	}
   121  }
   122  
   123  func (r *recoverer) archive(msg *base.TaskMessage, err error) {
   124  	if err := r.broker.Archive(context.Background(), msg, err.Error()); err != nil {
   125  		r.logger.Warnf("[Common] asynq recoverer: could not move task to archive: %v", err)
   126  	}
   127  }