github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/nomad/serf.go (about) 1 package nomad 2 3 import ( 4 "strings" 5 "sync/atomic" 6 "time" 7 8 log "github.com/hashicorp/go-hclog" 9 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/raft" 12 "github.com/hashicorp/serf/serf" 13 ) 14 15 const ( 16 // StatusReap is used to update the status of a node if we 17 // are handling a EventMemberReap 18 StatusReap = serf.MemberStatus(-1) 19 20 // maxPeerRetries limits how many invalidate attempts are made 21 maxPeerRetries = 6 22 23 // peerRetryBase is a baseline retry time 24 peerRetryBase = 1 * time.Second 25 ) 26 27 // serfEventHandler is used to handle events from the serf cluster 28 func (s *Server) serfEventHandler() { 29 for { 30 select { 31 case e := <-s.eventCh: 32 switch e.EventType() { 33 case serf.EventMemberJoin: 34 s.nodeJoin(e.(serf.MemberEvent)) 35 s.localMemberEvent(e.(serf.MemberEvent)) 36 case serf.EventMemberLeave, serf.EventMemberFailed: 37 s.nodeFailed(e.(serf.MemberEvent)) 38 s.localMemberEvent(e.(serf.MemberEvent)) 39 case serf.EventMemberReap: 40 s.localMemberEvent(e.(serf.MemberEvent)) 41 case serf.EventMemberUpdate, serf.EventUser, serf.EventQuery: // Ignore 42 default: 43 s.logger.Warn("unhandled serf event", "event", log.Fmt("%#v", e)) 44 } 45 46 case <-s.shutdownCh: 47 return 48 } 49 } 50 } 51 52 // nodeJoin is used to handle join events on the serf cluster 53 func (s *Server) nodeJoin(me serf.MemberEvent) { 54 for _, m := range me.Members { 55 ok, parts := isNomadServer(m) 56 if !ok { 57 s.logger.Warn("non-server in gossip pool", "member", m.Name) 58 continue 59 } 60 s.logger.Info("adding server", "server", parts) 61 62 // Check if this server is known 63 found := false 64 s.peerLock.Lock() 65 existing := s.peers[parts.Region] 66 for idx, e := range existing { 67 if e.Name == parts.Name { 68 existing[idx] = parts 69 found = true 70 break 71 } 72 } 73 74 // Add ot the list if not known 75 if !found { 76 s.peers[parts.Region] = append(existing, parts) 77 } 78 79 // Check if a local peer 80 if parts.Region == s.config.Region { 81 s.localPeers[raft.ServerAddress(parts.Addr.String())] = parts 82 } 83 s.peerLock.Unlock() 84 85 // If we still expecting to bootstrap, may need to handle this 86 if atomic.LoadInt32(&s.config.BootstrapExpect) != 0 { 87 s.maybeBootstrap() 88 } 89 } 90 } 91 92 // maybeBootstrap is used to handle bootstrapping when a new server joins 93 func (s *Server) maybeBootstrap() { 94 // Bootstrap can only be done if there are no committed logs, remove our 95 // expectations of bootstrapping. This is slightly cheaper than the full 96 // check that BootstrapCluster will do, so this is a good pre-filter. 97 var index uint64 98 var err error 99 if s.raftStore != nil { 100 index, err = s.raftStore.LastIndex() 101 } else if s.raftInmem != nil { 102 index, err = s.raftInmem.LastIndex() 103 } else { 104 panic("neither raftInmem or raftStore is initialized") 105 } 106 if err != nil { 107 s.logger.Error("failed to read last raft index", "error", err) 108 return 109 } 110 111 // Bootstrap can only be done if there are no committed logs, 112 // remove our expectations of bootstrapping 113 if index != 0 { 114 atomic.StoreInt32(&s.config.BootstrapExpect, 0) 115 return 116 } 117 118 // Scan for all the known servers 119 members := s.serf.Members() 120 var servers []serverParts 121 voters := 0 122 for _, member := range members { 123 valid, p := isNomadServer(member) 124 if !valid { 125 continue 126 } 127 if p.Region != s.config.Region { 128 continue 129 } 130 if p.Expect != 0 && p.Expect != int(atomic.LoadInt32(&s.config.BootstrapExpect)) { 131 s.logger.Error("peer has a conflicting expect value. All nodes should expect the same number", "member", member) 132 return 133 } 134 if p.Bootstrap { 135 s.logger.Error("peer has bootstrap mode. Expect disabled", "member", member) 136 return 137 } 138 if !p.NonVoter { 139 voters++ 140 } 141 servers = append(servers, *p) 142 } 143 144 // Skip if we haven't met the minimum expect count 145 if voters < int(atomic.LoadInt32(&s.config.BootstrapExpect)) { 146 return 147 } 148 149 // Query each of the servers and make sure they report no Raft peers. 150 req := &structs.GenericRequest{ 151 QueryOptions: structs.QueryOptions{ 152 AllowStale: true, 153 }, 154 } 155 for _, server := range servers { 156 var peers []string 157 158 // Retry with exponential backoff to get peer status from this server 159 for attempt := uint(0); attempt < maxPeerRetries; attempt++ { 160 if err := s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion, 161 "Status.Peers", req, &peers); err != nil { 162 nextRetry := (1 << attempt) * peerRetryBase 163 s.logger.Error("failed to confirm peer status", "peer", server.Name, "error", err, "retry", nextRetry) 164 time.Sleep(nextRetry) 165 } else { 166 break 167 } 168 } 169 170 // Found a node with some Raft peers, stop bootstrap since there's 171 // evidence of an existing cluster. We should get folded in by the 172 // existing servers if that's the case, so it's cleaner to sit as a 173 // candidate with no peers so we don't cause spurious elections. 174 // It's OK this is racy, because even with an initial bootstrap 175 // as long as one peer runs bootstrap things will work, and if we 176 // have multiple peers bootstrap in the same way, that's OK. We 177 // just don't want a server added much later to do a live bootstrap 178 // and interfere with the cluster. This isn't required for Raft's 179 // correctness because no server in the existing cluster will vote 180 // for this server, but it makes things much more stable. 181 if len(peers) > 0 { 182 s.logger.Info("disabling bootstrap mode because existing Raft peers being reported by peer", 183 "peer_name", server.Name, "peer_address", server.Addr) 184 atomic.StoreInt32(&s.config.BootstrapExpect, 0) 185 return 186 } 187 } 188 189 // Update the peer set 190 // Attempt a live bootstrap! 191 var configuration raft.Configuration 192 var addrs []string 193 minRaftVersion, err := s.autopilot.MinRaftProtocol() 194 if err != nil { 195 s.logger.Error("failed to read server raft versions", "error", err) 196 } 197 198 for _, server := range servers { 199 addr := server.Addr.String() 200 addrs = append(addrs, addr) 201 var id raft.ServerID 202 if minRaftVersion >= 3 { 203 id = raft.ServerID(server.ID) 204 } else { 205 id = raft.ServerID(addr) 206 } 207 suffrage := raft.Voter 208 if server.NonVoter { 209 suffrage = raft.Nonvoter 210 } 211 peer := raft.Server{ 212 ID: id, 213 Address: raft.ServerAddress(addr), 214 Suffrage: suffrage, 215 } 216 configuration.Servers = append(configuration.Servers, peer) 217 } 218 s.logger.Info("found expected number of peers, attempting to bootstrap cluster...", 219 "peers", strings.Join(addrs, ",")) 220 future := s.raft.BootstrapCluster(configuration) 221 if err := future.Error(); err != nil { 222 s.logger.Error("failed to bootstrap cluster", "error", err) 223 } 224 225 // Bootstrapping complete, or failed for some reason, don't enter this again 226 atomic.StoreInt32(&s.config.BootstrapExpect, 0) 227 } 228 229 // nodeFailed is used to handle fail events on the serf cluster 230 func (s *Server) nodeFailed(me serf.MemberEvent) { 231 for _, m := range me.Members { 232 ok, parts := isNomadServer(m) 233 if !ok { 234 continue 235 } 236 s.logger.Info("removing server", "server", parts) 237 238 // Remove the server if known 239 s.peerLock.Lock() 240 existing := s.peers[parts.Region] 241 n := len(existing) 242 for i := 0; i < n; i++ { 243 if existing[i].Name == parts.Name { 244 existing[i], existing[n-1] = existing[n-1], nil 245 existing = existing[:n-1] 246 n-- 247 break 248 } 249 } 250 251 // Trim the list there are no known servers in a region 252 if n == 0 { 253 delete(s.peers, parts.Region) 254 } else { 255 s.peers[parts.Region] = existing 256 } 257 258 // Check if local peer 259 if parts.Region == s.config.Region { 260 delete(s.localPeers, raft.ServerAddress(parts.Addr.String())) 261 } 262 s.peerLock.Unlock() 263 } 264 } 265 266 // localMemberEvent is used to reconcile Serf events with the 267 // consistent store if we are the current leader. 268 func (s *Server) localMemberEvent(me serf.MemberEvent) { 269 // Do nothing if we are not the leader 270 if !s.IsLeader() { 271 return 272 } 273 274 // Check if this is a reap event 275 isReap := me.EventType() == serf.EventMemberReap 276 277 // Queue the members for reconciliation 278 for _, m := range me.Members { 279 // Change the status if this is a reap event 280 if isReap { 281 m.Status = StatusReap 282 } 283 select { 284 case s.reconcileCh <- m: 285 default: 286 } 287 } 288 }