github.com/wfusion/gofusion@v1.1.14/common/infra/asynq/heartbeat.go (about)

     1  // Copyright 2020 Kentaro Hibino. All rights reserved.
     2  // Use of this source code is governed by a MIT license
     3  // that can be found in the LICENSE file.
     4  
     5  package asynq
     6  
     7  import (
     8  	"os"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/google/uuid"
    13  	"github.com/wfusion/gofusion/common/infra/asynq/pkg/base"
    14  	"github.com/wfusion/gofusion/common/infra/asynq/pkg/log"
    15  	"github.com/wfusion/gofusion/common/infra/asynq/pkg/timeutil"
    16  )
    17  
    18  // heartbeater is responsible for writing process info to redis periodically to
    19  // indicate that the background worker process is up.
    20  type heartbeater struct {
    21  	logger *log.Logger
    22  	broker base.Broker
    23  	clock  timeutil.Clock
    24  
    25  	// channel to communicate back to the long running "heartbeater" goroutine.
    26  	done chan struct{}
    27  
    28  	// interval between heartbeats.
    29  	interval time.Duration
    30  
    31  	// following fields are initialized at construction time and are immutable.
    32  	host           string
    33  	pid            int
    34  	serverID       string
    35  	concurrency    int
    36  	queues         map[string]int
    37  	strictPriority bool
    38  
    39  	// following fields are mutable and should be accessed only by the
    40  	// heartbeater goroutine. In other words, confine these variables
    41  	// to this goroutine only.
    42  	started time.Time
    43  	workers map[string]*workerInfo
    44  
    45  	// state is shared with other goroutine but is concurrency safe.
    46  	state *serverState
    47  
    48  	// channels to receive updates on active workers.
    49  	starting <-chan *workerInfo
    50  	finished <-chan *base.TaskMessage
    51  }
    52  
    53  type heartbeaterParams struct {
    54  	logger         *log.Logger
    55  	broker         base.Broker
    56  	interval       time.Duration
    57  	concurrency    int
    58  	queues         map[string]int
    59  	strictPriority bool
    60  	state          *serverState
    61  	starting       <-chan *workerInfo
    62  	finished       <-chan *base.TaskMessage
    63  }
    64  
    65  func newHeartbeater(params heartbeaterParams) *heartbeater {
    66  	host, err := os.Hostname()
    67  	if err != nil {
    68  		host = "unknown-host"
    69  	}
    70  
    71  	return &heartbeater{
    72  		logger:   params.logger,
    73  		broker:   params.broker,
    74  		clock:    timeutil.NewRealClock(),
    75  		done:     make(chan struct{}),
    76  		interval: params.interval,
    77  
    78  		host:           host,
    79  		pid:            os.Getpid(),
    80  		serverID:       uuid.New().String(),
    81  		concurrency:    params.concurrency,
    82  		queues:         params.queues,
    83  		strictPriority: params.strictPriority,
    84  
    85  		state:    params.state,
    86  		workers:  make(map[string]*workerInfo),
    87  		starting: params.starting,
    88  		finished: params.finished,
    89  	}
    90  }
    91  
    92  func (h *heartbeater) shutdown() {
    93  	h.logger.Debug("[Common] asynq heartbeater shutting down...")
    94  	// Signal the heartbeater goroutine to stop.
    95  	h.done <- struct{}{}
    96  }
    97  
    98  // A workerInfo holds an active worker information.
    99  type workerInfo struct {
   100  	// the task message the worker is processing.
   101  	msg *base.TaskMessage
   102  	// the time the worker has started processing the message.
   103  	started time.Time
   104  	// deadline the worker has to finish processing the task by.
   105  	deadline time.Time
   106  	// lease the worker holds for the task.
   107  	lease *base.Lease
   108  }
   109  
   110  func (h *heartbeater) start(wg *sync.WaitGroup) {
   111  	wg.Add(1)
   112  	go func() {
   113  		defer wg.Done()
   114  
   115  		h.started = h.clock.Now()
   116  
   117  		h.beat()
   118  
   119  		timer := time.NewTimer(h.interval)
   120  		for {
   121  			select {
   122  			case <-h.done:
   123  				_ = h.broker.ClearServerState(h.host, h.pid, h.serverID)
   124  				h.logger.Debug("[Common] asynq heartbeater done")
   125  				timer.Stop()
   126  				return
   127  
   128  			case <-timer.C:
   129  				h.beat()
   130  				timer.Reset(h.interval)
   131  
   132  			case w := <-h.starting:
   133  				h.workers[w.msg.ID] = w
   134  
   135  			case msg := <-h.finished:
   136  				delete(h.workers, msg.ID)
   137  			}
   138  		}
   139  	}()
   140  }
   141  
   142  // beat extends lease for workers and writes server/worker info to redis.
   143  func (h *heartbeater) beat() {
   144  	h.state.mu.Lock()
   145  	srvStatus := h.state.value.String()
   146  	h.state.mu.Unlock()
   147  
   148  	info := base.ServerInfo{
   149  		Host:              h.host,
   150  		PID:               h.pid,
   151  		ServerID:          h.serverID,
   152  		Concurrency:       h.concurrency,
   153  		Queues:            h.queues,
   154  		StrictPriority:    h.strictPriority,
   155  		Status:            srvStatus,
   156  		Started:           h.started,
   157  		ActiveWorkerCount: len(h.workers),
   158  	}
   159  
   160  	var ws []*base.WorkerInfo
   161  	idsByQueue := make(map[string][]string)
   162  	for id, w := range h.workers {
   163  		ws = append(ws, &base.WorkerInfo{
   164  			Host:     h.host,
   165  			PID:      h.pid,
   166  			ServerID: h.serverID,
   167  			ID:       id,
   168  			Type:     w.msg.Type,
   169  			Queue:    w.msg.Queue,
   170  			Payload:  w.msg.Payload,
   171  			Started:  w.started,
   172  			Deadline: w.deadline,
   173  		})
   174  		// Check lease before adding to the set to make sure not to extend the lease if the lease is already expired.
   175  		if w.lease.IsValid() {
   176  			idsByQueue[w.msg.Queue] = append(idsByQueue[w.msg.Queue], id)
   177  		} else {
   178  			w.lease.NotifyExpiration() // notify processor if the lease is expired
   179  		}
   180  	}
   181  
   182  	// Note: Set TTL to be long enough so that it won't expire before we write again
   183  	// and short enough to expire quickly once the process is shut down or killed.
   184  	if err := h.broker.WriteServerState(&info, ws, h.interval*2); err != nil {
   185  		h.logger.Errorf("[Common] asynq failed to write server state data: %v", err)
   186  	}
   187  
   188  	for qname, ids := range idsByQueue {
   189  		expirationTime, err := h.broker.ExtendLease(qname, ids...)
   190  		if err != nil {
   191  			h.logger.Errorf("[Common] asynq failed to extend lease for tasks %v: %v", ids, err)
   192  			continue
   193  		}
   194  		for _, id := range ids {
   195  			if l := h.workers[id].lease; !l.Reset(expirationTime) {
   196  				h.logger.Warnf("[Common] asynq lease reset failed for %s; lease deadline: %v", id, l.Deadline())
   197  			}
   198  		}
   199  	}
   200  }