gopkg.in/hashicorp/nomad.v0@v0.11.8/nomad/serf.go (about) 1 package nomad 2 3 import ( 4 "strings" 5 "sync/atomic" 6 "time" 7 8 log "github.com/hashicorp/go-hclog" 9 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/raft" 12 "github.com/hashicorp/serf/serf" 13 ) 14 15 const ( 16 // StatusReap is used to update the status of a node if we 17 // are handling a EventMemberReap 18 StatusReap = serf.MemberStatus(-1) 19 20 // maxPeerRetries limits how many invalidate attempts are made 21 maxPeerRetries = 6 22 23 // peerRetryBase is a baseline retry time 24 peerRetryBase = 1 * time.Second 25 ) 26 27 // serfEventHandler is used to handle events from the serf cluster 28 func (s *Server) serfEventHandler() { 29 for { 30 select { 31 case e := <-s.eventCh: 32 switch e.EventType() { 33 case serf.EventMemberJoin: 34 s.nodeJoin(e.(serf.MemberEvent)) 35 s.localMemberEvent(e.(serf.MemberEvent)) 36 case serf.EventMemberLeave, serf.EventMemberFailed: 37 s.nodeFailed(e.(serf.MemberEvent)) 38 s.localMemberEvent(e.(serf.MemberEvent)) 39 case serf.EventMemberReap: 40 s.localMemberEvent(e.(serf.MemberEvent)) 41 case serf.EventMemberUpdate, serf.EventUser, serf.EventQuery: // Ignore 42 default: 43 s.logger.Warn("unhandled serf event", "event", log.Fmt("%#v", e)) 44 } 45 46 case <-s.shutdownCh: 47 return 48 } 49 } 50 } 51 52 // nodeJoin is used to handle join events on the serf cluster 53 func (s *Server) nodeJoin(me serf.MemberEvent) { 54 for _, m := range me.Members { 55 ok, parts := isNomadServer(m) 56 if !ok { 57 s.logger.Warn("non-server in gossip pool", "member", m.Name) 58 continue 59 } 60 s.logger.Info("adding server", "server", parts) 61 62 // Check if this server is known 63 found := false 64 s.peerLock.Lock() 65 existing := s.peers[parts.Region] 66 for idx, e := range existing { 67 if e.Name == parts.Name { 68 existing[idx] = parts 69 found = true 70 break 71 } 72 } 73 74 // Add ot the list if not known 75 if !found { 76 s.peers[parts.Region] = append(existing, parts) 77 } 78 79 // Check if a local peer 80 if parts.Region == s.config.Region { 81 s.localPeers[raft.ServerAddress(parts.Addr.String())] = parts 82 } 83 s.peerLock.Unlock() 84 85 // If we still expecting to bootstrap, may need to handle this 86 if atomic.LoadInt32(&s.config.Bootstrapped) == 0 { 87 s.maybeBootstrap() 88 } 89 } 90 } 91 92 // maybeBootstrap is used to handle bootstrapping when a new server joins 93 func (s *Server) maybeBootstrap() { 94 // Bootstrap can only be done if there are no committed logs, remove our 95 // expectations of bootstrapping. This is slightly cheaper than the full 96 // check that BootstrapCluster will do, so this is a good pre-filter. 97 var index uint64 98 var err error 99 if s.raftStore != nil { 100 index, err = s.raftStore.LastIndex() 101 } else if s.raftInmem != nil { 102 index, err = s.raftInmem.LastIndex() 103 } else { 104 panic("neither raftInmem or raftStore is initialized") 105 } 106 if err != nil { 107 s.logger.Error("failed to read last raft index", "error", err) 108 return 109 } 110 111 // Bootstrap can only be done if there are no committed logs, 112 // remove our expectations of bootstrapping 113 if index != 0 { 114 atomic.StoreInt32(&s.config.Bootstrapped, 1) 115 return 116 } 117 118 // Scan for all the known servers 119 members := s.serf.Members() 120 var servers []serverParts 121 voters := 0 122 for _, member := range members { 123 valid, p := isNomadServer(member) 124 if !valid { 125 continue 126 } 127 if p.Region != s.config.Region { 128 continue 129 } 130 if p.Expect != 0 && p.Expect != s.config.BootstrapExpect { 131 s.logger.Error("peer has a conflicting expect value. All nodes should expect the same number", "member", member) 132 return 133 } 134 if p.Bootstrap { 135 s.logger.Error("peer has bootstrap mode. Expect disabled", "member", member) 136 return 137 } 138 if !p.NonVoter { 139 voters++ 140 } 141 142 servers = append(servers, *p) 143 } 144 145 // Skip if we haven't met the minimum expect count 146 if voters < s.config.BootstrapExpect { 147 return 148 } 149 150 // Query each of the servers and make sure they report no Raft peers. 151 req := &structs.GenericRequest{ 152 QueryOptions: structs.QueryOptions{ 153 AllowStale: true, 154 }, 155 } 156 for _, server := range servers { 157 var peers []string 158 159 // Retry with exponential backoff to get peer status from this server 160 for attempt := uint(0); attempt < maxPeerRetries; attempt++ { 161 if err := s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion, 162 "Status.Peers", req, &peers); err != nil { 163 nextRetry := (1 << attempt) * peerRetryBase 164 s.logger.Error("failed to confirm peer status", "peer", server.Name, "error", err, "retry", nextRetry) 165 time.Sleep(nextRetry) 166 } else { 167 break 168 } 169 } 170 171 // Found a node with some Raft peers, stop bootstrap since there's 172 // evidence of an existing cluster. We should get folded in by the 173 // existing servers if that's the case, so it's cleaner to sit as a 174 // candidate with no peers so we don't cause spurious elections. 175 // It's OK this is racy, because even with an initial bootstrap 176 // as long as one peer runs bootstrap things will work, and if we 177 // have multiple peers bootstrap in the same way, that's OK. We 178 // just don't want a server added much later to do a live bootstrap 179 // and interfere with the cluster. This isn't required for Raft's 180 // correctness because no server in the existing cluster will vote 181 // for this server, but it makes things much more stable. 182 if len(peers) > 0 { 183 s.logger.Info("disabling bootstrap mode because existing Raft peers being reported by peer", 184 "peer_name", server.Name, "peer_address", server.Addr) 185 atomic.StoreInt32(&s.config.Bootstrapped, 1) 186 return 187 } 188 } 189 190 // Update the peer set 191 // Attempt a live bootstrap! 192 var configuration raft.Configuration 193 var addrs []string 194 minRaftVersion, err := s.autopilot.MinRaftProtocol() 195 if err != nil { 196 s.logger.Error("failed to read server raft versions", "error", err) 197 } 198 199 for _, server := range servers { 200 addr := server.Addr.String() 201 addrs = append(addrs, addr) 202 var id raft.ServerID 203 if minRaftVersion >= 3 { 204 id = raft.ServerID(server.ID) 205 } else { 206 id = raft.ServerID(addr) 207 } 208 suffrage := raft.Voter 209 if server.NonVoter { 210 suffrage = raft.Nonvoter 211 } 212 peer := raft.Server{ 213 ID: id, 214 Address: raft.ServerAddress(addr), 215 Suffrage: suffrage, 216 } 217 configuration.Servers = append(configuration.Servers, peer) 218 } 219 s.logger.Info("found expected number of peers, attempting to bootstrap cluster...", 220 "peers", strings.Join(addrs, ",")) 221 future := s.raft.BootstrapCluster(configuration) 222 if err := future.Error(); err != nil { 223 s.logger.Error("failed to bootstrap cluster", "error", err) 224 } 225 226 // Bootstrapping complete, or failed for some reason, don't enter this again 227 atomic.StoreInt32(&s.config.Bootstrapped, 1) 228 } 229 230 // nodeFailed is used to handle fail events on the serf cluster 231 func (s *Server) nodeFailed(me serf.MemberEvent) { 232 for _, m := range me.Members { 233 ok, parts := isNomadServer(m) 234 if !ok { 235 continue 236 } 237 s.logger.Info("removing server", "server", parts) 238 239 // Remove the server if known 240 s.peerLock.Lock() 241 existing := s.peers[parts.Region] 242 n := len(existing) 243 for i := 0; i < n; i++ { 244 if existing[i].Name == parts.Name { 245 existing[i], existing[n-1] = existing[n-1], nil 246 existing = existing[:n-1] 247 n-- 248 break 249 } 250 } 251 252 // Trim the list there are no known servers in a region 253 if n == 0 { 254 delete(s.peers, parts.Region) 255 } else { 256 s.peers[parts.Region] = existing 257 } 258 259 // Check if local peer 260 if parts.Region == s.config.Region { 261 delete(s.localPeers, raft.ServerAddress(parts.Addr.String())) 262 } 263 s.peerLock.Unlock() 264 } 265 } 266 267 // localMemberEvent is used to reconcile Serf events with the 268 // consistent store if we are the current leader. 269 func (s *Server) localMemberEvent(me serf.MemberEvent) { 270 // Do nothing if we are not the leader 271 if !s.IsLeader() { 272 return 273 } 274 275 // Check if this is a reap event 276 isReap := me.EventType() == serf.EventMemberReap 277 278 // Queue the members for reconciliation 279 for _, m := range me.Members { 280 // Change the status if this is a reap event 281 if isReap { 282 m.Status = StatusReap 283 } 284 select { 285 case s.reconcileCh <- m: 286 default: 287 } 288 } 289 }