github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/serf.go (about) 1 package nomad 2 3 import ( 4 "strings" 5 "sync/atomic" 6 "time" 7 8 "github.com/hashicorp/nomad/nomad/structs" 9 "github.com/hashicorp/raft" 10 "github.com/hashicorp/serf/serf" 11 ) 12 13 const ( 14 // StatusReap is used to update the status of a node if we 15 // are handling a EventMemberReap 16 StatusReap = serf.MemberStatus(-1) 17 18 // maxPeerRetries limits how many invalidate attempts are made 19 maxPeerRetries = 6 20 21 // peerRetryBase is a baseline retry time 22 peerRetryBase = 1 * time.Second 23 ) 24 25 // serfEventHandler is used to handle events from the serf cluster 26 func (s *Server) serfEventHandler() { 27 for { 28 select { 29 case e := <-s.eventCh: 30 switch e.EventType() { 31 case serf.EventMemberJoin: 32 s.nodeJoin(e.(serf.MemberEvent)) 33 s.localMemberEvent(e.(serf.MemberEvent)) 34 case serf.EventMemberLeave, serf.EventMemberFailed: 35 s.nodeFailed(e.(serf.MemberEvent)) 36 s.localMemberEvent(e.(serf.MemberEvent)) 37 case serf.EventMemberReap: 38 s.localMemberEvent(e.(serf.MemberEvent)) 39 case serf.EventMemberUpdate, serf.EventUser, serf.EventQuery: // Ignore 40 default: 41 s.logger.Printf("[WARN] nomad: unhandled serf event: %#v", e) 42 } 43 44 case <-s.shutdownCh: 45 return 46 } 47 } 48 } 49 50 // nodeJoin is used to handle join events on the serf cluster 51 func (s *Server) nodeJoin(me serf.MemberEvent) { 52 for _, m := range me.Members { 53 ok, parts := isNomadServer(m) 54 if !ok { 55 s.logger.Printf("[WARN] nomad: non-server in gossip pool: %s", m.Name) 56 continue 57 } 58 s.logger.Printf("[INFO] nomad: adding server %s", parts) 59 60 // Check if this server is known 61 found := false 62 s.peerLock.Lock() 63 existing := s.peers[parts.Region] 64 for idx, e := range existing { 65 if e.Name == parts.Name { 66 existing[idx] = parts 67 found = true 68 break 69 } 70 } 71 72 // Add ot the list if not known 73 if !found { 74 s.peers[parts.Region] = append(existing, parts) 75 } 76 77 // Check if a local peer 78 if parts.Region == s.config.Region { 79 s.localPeers[raft.ServerAddress(parts.Addr.String())] = parts 80 } 81 s.peerLock.Unlock() 82 83 // If we still expecting to bootstrap, may need to handle this 84 if atomic.LoadInt32(&s.config.BootstrapExpect) != 0 { 85 s.maybeBootstrap() 86 } 87 } 88 } 89 90 // maybeBootstrap is used to handle bootstrapping when a new server joins 91 func (s *Server) maybeBootstrap() { 92 // Bootstrap can only be done if there are no committed logs, remove our 93 // expectations of bootstrapping. This is slightly cheaper than the full 94 // check that BootstrapCluster will do, so this is a good pre-filter. 95 var index uint64 96 var err error 97 if s.raftStore != nil { 98 index, err = s.raftStore.LastIndex() 99 } else if s.raftInmem != nil { 100 index, err = s.raftInmem.LastIndex() 101 } else { 102 panic("neither raftInmem or raftStore is initialized") 103 } 104 if err != nil { 105 s.logger.Printf("[ERR] nomad: failed to read last raft index: %v", err) 106 return 107 } 108 109 // Bootstrap can only be done if there are no committed logs, 110 // remove our expectations of bootstrapping 111 if index != 0 { 112 atomic.StoreInt32(&s.config.BootstrapExpect, 0) 113 return 114 } 115 116 // Scan for all the known servers 117 members := s.serf.Members() 118 var servers []serverParts 119 for _, member := range members { 120 valid, p := isNomadServer(member) 121 if !valid { 122 continue 123 } 124 if p.Region != s.config.Region { 125 continue 126 } 127 if p.Expect != 0 && p.Expect != int(atomic.LoadInt32(&s.config.BootstrapExpect)) { 128 s.logger.Printf("[ERR] nomad: peer %v has a conflicting expect value. All nodes should expect the same number.", member) 129 return 130 } 131 if p.Bootstrap { 132 s.logger.Printf("[ERR] nomad: peer %v has bootstrap mode. Expect disabled.", member) 133 return 134 } 135 servers = append(servers, *p) 136 } 137 138 // Skip if we haven't met the minimum expect count 139 if len(servers) < int(atomic.LoadInt32(&s.config.BootstrapExpect)) { 140 return 141 } 142 143 // Query each of the servers and make sure they report no Raft peers. 144 req := &structs.GenericRequest{ 145 QueryOptions: structs.QueryOptions{ 146 AllowStale: true, 147 }, 148 } 149 for _, server := range servers { 150 var peers []string 151 152 // Retry with exponential backoff to get peer status from this server 153 for attempt := uint(0); attempt < maxPeerRetries; attempt++ { 154 if err := s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion, 155 "Status.Peers", req, &peers); err != nil { 156 nextRetry := (1 << attempt) * peerRetryBase 157 s.logger.Printf("[ERR] nomad: Failed to confirm peer status for %s: %v. Retrying in "+ 158 "%v...", server.Name, err, nextRetry.String()) 159 time.Sleep(nextRetry) 160 } else { 161 break 162 } 163 } 164 165 // Found a node with some Raft peers, stop bootstrap since there's 166 // evidence of an existing cluster. We should get folded in by the 167 // existing servers if that's the case, so it's cleaner to sit as a 168 // candidate with no peers so we don't cause spurious elections. 169 // It's OK this is racy, because even with an initial bootstrap 170 // as long as one peer runs bootstrap things will work, and if we 171 // have multiple peers bootstrap in the same way, that's OK. We 172 // just don't want a server added much later to do a live bootstrap 173 // and interfere with the cluster. This isn't required for Raft's 174 // correctness because no server in the existing cluster will vote 175 // for this server, but it makes things much more stable. 176 if len(peers) > 0 { 177 s.logger.Printf("[INFO] nomad: Existing Raft peers reported by %s (%v), disabling bootstrap mode", server.Name, server.Addr) 178 atomic.StoreInt32(&s.config.BootstrapExpect, 0) 179 return 180 } 181 } 182 183 // Update the peer set 184 // Attempt a live bootstrap! 185 var configuration raft.Configuration 186 var addrs []string 187 minRaftVersion, err := s.autopilot.MinRaftProtocol() 188 if err != nil { 189 s.logger.Printf("[ERR] nomad: Failed to read server raft versions: %v", err) 190 } 191 192 for _, server := range servers { 193 addr := server.Addr.String() 194 addrs = append(addrs, addr) 195 var id raft.ServerID 196 if minRaftVersion >= 3 { 197 id = raft.ServerID(server.ID) 198 } else { 199 id = raft.ServerID(addr) 200 } 201 peer := raft.Server{ 202 ID: id, 203 Address: raft.ServerAddress(addr), 204 } 205 configuration.Servers = append(configuration.Servers, peer) 206 } 207 s.logger.Printf("[INFO] nomad: Found expected number of peers (%s), attempting to bootstrap cluster...", 208 strings.Join(addrs, ",")) 209 future := s.raft.BootstrapCluster(configuration) 210 if err := future.Error(); err != nil { 211 s.logger.Printf("[ERR] nomad: Failed to bootstrap cluster: %v", err) 212 } 213 214 // Bootstrapping complete, or failed for some reason, don't enter this again 215 atomic.StoreInt32(&s.config.BootstrapExpect, 0) 216 } 217 218 // nodeFailed is used to handle fail events on the serf cluster 219 func (s *Server) nodeFailed(me serf.MemberEvent) { 220 for _, m := range me.Members { 221 ok, parts := isNomadServer(m) 222 if !ok { 223 continue 224 } 225 s.logger.Printf("[INFO] nomad: removing server %s", parts) 226 227 // Remove the server if known 228 s.peerLock.Lock() 229 existing := s.peers[parts.Region] 230 n := len(existing) 231 for i := 0; i < n; i++ { 232 if existing[i].Name == parts.Name { 233 existing[i], existing[n-1] = existing[n-1], nil 234 existing = existing[:n-1] 235 n-- 236 break 237 } 238 } 239 240 // Trim the list there are no known servers in a region 241 if n == 0 { 242 delete(s.peers, parts.Region) 243 } else { 244 s.peers[parts.Region] = existing 245 } 246 247 // Check if local peer 248 if parts.Region == s.config.Region { 249 delete(s.localPeers, raft.ServerAddress(parts.Addr.String())) 250 } 251 s.peerLock.Unlock() 252 } 253 } 254 255 // localMemberEvent is used to reconcile Serf events with the 256 // consistent store if we are the current leader. 257 func (s *Server) localMemberEvent(me serf.MemberEvent) { 258 // Do nothing if we are not the leader 259 if !s.IsLeader() { 260 return 261 } 262 263 // Check if this is a reap event 264 isReap := me.EventType() == serf.EventMemberReap 265 266 // Queue the members for reconciliation 267 for _, m := range me.Members { 268 // Change the status if this is a reap event 269 if isReap { 270 m.Status = StatusReap 271 } 272 select { 273 case s.reconcileCh <- m: 274 default: 275 } 276 } 277 }