github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/heartbeatstop.go (about)

     1  package client
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	hclog "github.com/hashicorp/go-hclog"
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  )
    10  
    11  type heartbeatStop struct {
    12  	lastOk        time.Time
    13  	startupGrace  time.Time
    14  	allocInterval map[string]time.Duration
    15  	allocHookCh   chan *structs.Allocation
    16  	getRunner     func(string) (AllocRunner, error)
    17  	logger        hclog.InterceptLogger
    18  	shutdownCh    chan struct{}
    19  	lock          *sync.RWMutex
    20  }
    21  
    22  func newHeartbeatStop(
    23  	getRunner func(string) (AllocRunner, error),
    24  	timeout time.Duration,
    25  	logger hclog.InterceptLogger,
    26  	shutdownCh chan struct{}) *heartbeatStop {
    27  
    28  	h := &heartbeatStop{
    29  		startupGrace:  time.Now().Add(timeout),
    30  		allocInterval: make(map[string]time.Duration),
    31  		allocHookCh:   make(chan *structs.Allocation),
    32  		getRunner:     getRunner,
    33  		logger:        logger,
    34  		shutdownCh:    shutdownCh,
    35  		lock:          &sync.RWMutex{},
    36  	}
    37  
    38  	return h
    39  }
    40  
    41  // allocHook is called after (re)storing a new AllocRunner in the client. It registers the
    42  // allocation to be stopped if the taskgroup is configured appropriately
    43  func (h *heartbeatStop) allocHook(alloc *structs.Allocation) {
    44  	tg := allocTaskGroup(alloc)
    45  	if tg.StopAfterClientDisconnect != nil {
    46  		h.allocHookCh <- alloc
    47  	}
    48  }
    49  
    50  // shouldStop is called on a restored alloc to determine if lastOk is sufficiently in the
    51  // past that it should be prevented from restarting
    52  func (h *heartbeatStop) shouldStop(alloc *structs.Allocation) bool {
    53  	tg := allocTaskGroup(alloc)
    54  	if tg.StopAfterClientDisconnect != nil {
    55  		return h.shouldStopAfter(time.Now(), *tg.StopAfterClientDisconnect)
    56  	}
    57  	return false
    58  }
    59  
    60  func (h *heartbeatStop) shouldStopAfter(now time.Time, interval time.Duration) bool {
    61  	lastOk := h.getLastOk()
    62  	if lastOk.IsZero() {
    63  		return h.startupGrace.After(now)
    64  	}
    65  	return now.After(lastOk.Add(interval))
    66  }
    67  
    68  // watch is a loop that checks for allocations that should be stopped. It also manages the
    69  // registration of allocs to be stopped in a single thread.
    70  func (h *heartbeatStop) watch() {
    71  	// If we never manage to successfully contact the server, we want to stop our allocs
    72  	// after duration + start time
    73  	h.lastOk = time.Now()
    74  	stop := make(chan string, 1)
    75  	var now time.Time
    76  	var interval time.Duration
    77  	checkAllocs := false
    78  
    79  	for {
    80  		// minimize the interval
    81  		interval = 5 * time.Second
    82  		for _, t := range h.allocInterval {
    83  			if t < interval {
    84  				interval = t
    85  			}
    86  		}
    87  
    88  		checkAllocs = false
    89  		timeout := time.After(interval)
    90  
    91  		select {
    92  		case allocID := <-stop:
    93  			if err := h.stopAlloc(allocID); err != nil {
    94  				h.logger.Warn("error stopping on heartbeat timeout", "alloc", allocID, "error", err)
    95  				continue
    96  			}
    97  			delete(h.allocInterval, allocID)
    98  
    99  		case alloc := <-h.allocHookCh:
   100  			tg := allocTaskGroup(alloc)
   101  			if tg.StopAfterClientDisconnect != nil {
   102  				h.allocInterval[alloc.ID] = *tg.StopAfterClientDisconnect
   103  			}
   104  
   105  		case <-timeout:
   106  			checkAllocs = true
   107  
   108  		case <-h.shutdownCh:
   109  			return
   110  		}
   111  
   112  		if !checkAllocs {
   113  			continue
   114  		}
   115  
   116  		now = time.Now()
   117  		for allocID, d := range h.allocInterval {
   118  			if h.shouldStopAfter(now, d) {
   119  				stop <- allocID
   120  			}
   121  		}
   122  	}
   123  }
   124  
   125  // setLastOk sets the last known good heartbeat time to the current time, and persists that time to disk
   126  func (h *heartbeatStop) setLastOk(t time.Time) {
   127  	h.lock.Lock()
   128  	defer h.lock.Unlock()
   129  	h.lastOk = t
   130  }
   131  
   132  func (h *heartbeatStop) getLastOk() time.Time {
   133  	h.lock.RLock()
   134  	defer h.lock.RUnlock()
   135  	return h.lastOk
   136  }
   137  
   138  // stopAlloc actually stops the allocation
   139  func (h *heartbeatStop) stopAlloc(allocID string) error {
   140  	runner, err := h.getRunner(allocID)
   141  	if err != nil {
   142  		return err
   143  	}
   144  
   145  	h.logger.Debug("stopping alloc for stop_after_client_disconnect", "alloc", allocID)
   146  
   147  	runner.Destroy()
   148  	return nil
   149  }
   150  
   151  func allocTaskGroup(alloc *structs.Allocation) *structs.TaskGroup {
   152  	for _, tg := range alloc.Job.TaskGroups {
   153  		if tg.Name == alloc.TaskGroup {
   154  			return tg
   155  		}
   156  	}
   157  	return nil
   158  }