github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/heartbeat.go (about)

     1  package nomad
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/armon/go-metrics"
     7  	"github.com/hashicorp/nomad/nomad/structs"
     8  )
     9  
    10  // initializeHeartbeatTimers is used when a leader is newly elected to create
    11  // a new map to track heartbeat expiration and to reset all the timers from
    12  // the previously known set of timers.
    13  func (s *Server) initializeHeartbeatTimers() error {
    14  	// Scan all nodes and reset their timer
    15  	snap, err := s.fsm.State().Snapshot()
    16  	if err != nil {
    17  		return err
    18  	}
    19  
    20  	// Get an iterator over nodes
    21  	iter, err := snap.Nodes()
    22  	if err != nil {
    23  		return err
    24  	}
    25  
    26  	s.heartbeatTimersLock.Lock()
    27  	defer s.heartbeatTimersLock.Unlock()
    28  
    29  	// Handle each node
    30  	for {
    31  		raw := iter.Next()
    32  		if raw == nil {
    33  			break
    34  		}
    35  		node := raw.(*structs.Node)
    36  		if node.TerminalStatus() {
    37  			continue
    38  		}
    39  		s.resetHeartbeatTimerLocked(node.ID, s.config.FailoverHeartbeatTTL)
    40  	}
    41  	return nil
    42  }
    43  
    44  // resetHeartbeatTimer is used to reset the TTL of a heartbeat.
    45  // This can be used for new heartbeats and existing ones.
    46  func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) {
    47  	s.heartbeatTimersLock.Lock()
    48  	defer s.heartbeatTimersLock.Unlock()
    49  
    50  	// Compute the target TTL value
    51  	n := len(s.heartbeatTimers)
    52  	ttl := rateScaledInterval(s.config.MaxHeartbeatsPerSecond,
    53  		s.config.MinHeartbeatTTL, n)
    54  	ttl += randomStagger(ttl)
    55  
    56  	// Reset the TTL
    57  	s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace)
    58  	return ttl, nil
    59  }
    60  
    61  // resetHeartbeatTimerLocked is used to reset a heartbeat timer
    62  // assuming the heartbeatTimerLock is already held
    63  func (s *Server) resetHeartbeatTimerLocked(id string, ttl time.Duration) {
    64  	// Ensure a timer map exists
    65  	if s.heartbeatTimers == nil {
    66  		s.heartbeatTimers = make(map[string]*time.Timer)
    67  	}
    68  
    69  	// Renew the heartbeat timer if it exists
    70  	if timer, ok := s.heartbeatTimers[id]; ok {
    71  		timer.Reset(ttl)
    72  		return
    73  	}
    74  
    75  	// Create a new timer to track expiration of thi sheartbeat
    76  	timer := time.AfterFunc(ttl, func() {
    77  		s.invalidateHeartbeat(id)
    78  	})
    79  	s.heartbeatTimers[id] = timer
    80  }
    81  
    82  // invalidateHeartbeat is invoked when a heartbeat TTL is reached and we
    83  // need to invalidate the heartbeat.
    84  func (s *Server) invalidateHeartbeat(id string) {
    85  	defer metrics.MeasureSince([]string{"nomad", "heartbeat", "invalidate"}, time.Now())
    86  	// Clear the heartbeat timer
    87  	s.heartbeatTimersLock.Lock()
    88  	delete(s.heartbeatTimers, id)
    89  	s.heartbeatTimersLock.Unlock()
    90  	s.logger.Printf("[DEBUG] nomad.heartbeat: node '%s' TTL expired", id)
    91  
    92  	// Make a request to update the node status
    93  	req := structs.NodeUpdateStatusRequest{
    94  		NodeID: id,
    95  		Status: structs.NodeStatusDown,
    96  		WriteRequest: structs.WriteRequest{
    97  			Region: s.config.Region,
    98  		},
    99  	}
   100  	var resp structs.NodeUpdateResponse
   101  	if err := s.endpoints.Node.UpdateStatus(&req, &resp); err != nil {
   102  		s.logger.Printf("[ERR] nomad.heartbeat: update status failed: %v", err)
   103  	}
   104  }
   105  
   106  // clearHeartbeatTimer is used to clear the heartbeat time for
   107  // a single heartbeat. This is used when a heartbeat is destroyed
   108  // explicitly and no longer needed.
   109  func (s *Server) clearHeartbeatTimer(id string) error {
   110  	s.heartbeatTimersLock.Lock()
   111  	defer s.heartbeatTimersLock.Unlock()
   112  
   113  	if timer, ok := s.heartbeatTimers[id]; ok {
   114  		timer.Stop()
   115  		delete(s.heartbeatTimers, id)
   116  	}
   117  	return nil
   118  }
   119  
   120  // clearAllHeartbeatTimers is used when a leader is stepping
   121  // down and we no longer need to track any heartbeat timers.
   122  func (s *Server) clearAllHeartbeatTimers() error {
   123  	s.heartbeatTimersLock.Lock()
   124  	defer s.heartbeatTimersLock.Unlock()
   125  
   126  	for _, t := range s.heartbeatTimers {
   127  		t.Stop()
   128  	}
   129  	s.heartbeatTimers = nil
   130  	return nil
   131  }
   132  
   133  // heartbeatStats is a long running routine used to capture
   134  // the number of active heartbeats being tracked
   135  func (s *Server) heartbeatStats() {
   136  	for {
   137  		select {
   138  		case <-time.After(5 * time.Second):
   139  			s.heartbeatTimersLock.Lock()
   140  			num := len(s.heartbeatTimers)
   141  			s.heartbeatTimersLock.Unlock()
   142  			metrics.SetGauge([]string{"nomad", "heartbeat", "active"}, float32(num))
   143  
   144  		case <-s.shutdownCh:
   145  			return
   146  		}
   147  	}
   148  }