github.com/huiliang/nomad@v0.2.1-0.20151124023127-7a8b664699ff/nomad/heartbeat.go (about) 1 package nomad 2 3 import ( 4 "time" 5 6 "github.com/armon/go-metrics" 7 "github.com/hashicorp/nomad/nomad/structs" 8 ) 9 10 // initializeHeartbeatTimers is used when a leader is newly elected to create 11 // a new map to track heartbeat expiration and to reset all the timers from 12 // the previously known set of timers. 13 func (s *Server) initializeHeartbeatTimers() error { 14 // Scan all nodes and reset their timer 15 snap, err := s.fsm.State().Snapshot() 16 if err != nil { 17 return err 18 } 19 20 // Get an iterator over nodes 21 iter, err := snap.Nodes() 22 if err != nil { 23 return err 24 } 25 26 s.heartbeatTimersLock.Lock() 27 defer s.heartbeatTimersLock.Unlock() 28 29 // Handle each node 30 for { 31 raw := iter.Next() 32 if raw == nil { 33 break 34 } 35 node := raw.(*structs.Node) 36 if node.TerminalStatus() { 37 continue 38 } 39 s.resetHeartbeatTimerLocked(node.ID, s.config.FailoverHeartbeatTTL) 40 } 41 return nil 42 } 43 44 // resetHeartbeatTimer is used to reset the TTL of a heartbeat. 45 // This can be used for new heartbeats and existing ones. 46 func (s *Server) resetHeartbeatTimer(id string) (time.Duration, error) { 47 s.heartbeatTimersLock.Lock() 48 defer s.heartbeatTimersLock.Unlock() 49 50 // Compute the target TTL value 51 n := len(s.heartbeatTimers) 52 ttl := rateScaledInterval(s.config.MaxHeartbeatsPerSecond, 53 s.config.MinHeartbeatTTL, n) 54 ttl += randomStagger(ttl) 55 56 // Reset the TTL 57 s.resetHeartbeatTimerLocked(id, ttl+s.config.HeartbeatGrace) 58 return ttl, nil 59 } 60 61 // resetHeartbeatTimerLocked is used to reset a heartbeat timer 62 // assuming the heartbeatTimerLock is already held 63 func (s *Server) resetHeartbeatTimerLocked(id string, ttl time.Duration) { 64 // Ensure a timer map exists 65 if s.heartbeatTimers == nil { 66 s.heartbeatTimers = make(map[string]*time.Timer) 67 } 68 69 // Renew the heartbeat timer if it exists 70 if timer, ok := s.heartbeatTimers[id]; ok { 71 timer.Reset(ttl) 72 return 73 } 74 75 // Create a new timer to track expiration of thi sheartbeat 76 timer := time.AfterFunc(ttl, func() { 77 s.invalidateHeartbeat(id) 78 }) 79 s.heartbeatTimers[id] = timer 80 } 81 82 // invalidateHeartbeat is invoked when a heartbeat TTL is reached and we 83 // need to invalidate the heartbeat. 84 func (s *Server) invalidateHeartbeat(id string) { 85 defer metrics.MeasureSince([]string{"nomad", "heartbeat", "invalidate"}, time.Now()) 86 // Clear the heartbeat timer 87 s.heartbeatTimersLock.Lock() 88 delete(s.heartbeatTimers, id) 89 s.heartbeatTimersLock.Unlock() 90 s.logger.Printf("[DEBUG] nomad.heartbeat: node '%s' TTL expired", id) 91 92 // Make a request to update the node status 93 req := structs.NodeUpdateStatusRequest{ 94 NodeID: id, 95 Status: structs.NodeStatusDown, 96 WriteRequest: structs.WriteRequest{ 97 Region: s.config.Region, 98 }, 99 } 100 var resp structs.NodeUpdateResponse 101 if err := s.endpoints.Node.UpdateStatus(&req, &resp); err != nil { 102 s.logger.Printf("[ERR] nomad.heartbeat: update status failed: %v", err) 103 } 104 } 105 106 // clearHeartbeatTimer is used to clear the heartbeat time for 107 // a single heartbeat. This is used when a heartbeat is destroyed 108 // explicitly and no longer needed. 109 func (s *Server) clearHeartbeatTimer(id string) error { 110 s.heartbeatTimersLock.Lock() 111 defer s.heartbeatTimersLock.Unlock() 112 113 if timer, ok := s.heartbeatTimers[id]; ok { 114 timer.Stop() 115 delete(s.heartbeatTimers, id) 116 } 117 return nil 118 } 119 120 // clearAllHeartbeatTimers is used when a leader is stepping 121 // down and we no longer need to track any heartbeat timers. 122 func (s *Server) clearAllHeartbeatTimers() error { 123 s.heartbeatTimersLock.Lock() 124 defer s.heartbeatTimersLock.Unlock() 125 126 for _, t := range s.heartbeatTimers { 127 t.Stop() 128 } 129 s.heartbeatTimers = nil 130 return nil 131 } 132 133 // heartbeatStats is a long running routine used to capture 134 // the number of active heartbeats being tracked 135 func (s *Server) heartbeatStats() { 136 for { 137 select { 138 case <-time.After(5 * time.Second): 139 s.heartbeatTimersLock.Lock() 140 num := len(s.heartbeatTimers) 141 s.heartbeatTimersLock.Unlock() 142 metrics.SetGauge([]string{"nomad", "heartbeat", "active"}, float32(num)) 143 144 case <-s.shutdownCh: 145 return 146 } 147 } 148 }