github.com/huiliang/nomad@v0.2.1-0.20151124023127-7a8b664699ff/nomad/serf.go (about) 1 package nomad 2 3 import "github.com/hashicorp/serf/serf" 4 5 const ( 6 // StatusReap is used to update the status of a node if we 7 // are handling a EventMemberReap 8 StatusReap = serf.MemberStatus(-1) 9 ) 10 11 // serfEventHandler is used to handle events from the serf cluster 12 func (s *Server) serfEventHandler() { 13 for { 14 select { 15 case e := <-s.eventCh: 16 switch e.EventType() { 17 case serf.EventMemberJoin: 18 s.nodeJoin(e.(serf.MemberEvent)) 19 s.localMemberEvent(e.(serf.MemberEvent)) 20 case serf.EventMemberLeave, serf.EventMemberFailed: 21 s.nodeFailed(e.(serf.MemberEvent)) 22 s.localMemberEvent(e.(serf.MemberEvent)) 23 case serf.EventMemberUpdate, serf.EventMemberReap, 24 serf.EventUser, serf.EventQuery: // Ignore 25 default: 26 s.logger.Printf("[WARN] nomad: unhandled serf event: %#v", e) 27 } 28 29 case <-s.shutdownCh: 30 return 31 } 32 } 33 } 34 35 // nodeJoin is used to handle join events on the serf cluster 36 func (s *Server) nodeJoin(me serf.MemberEvent) { 37 for _, m := range me.Members { 38 ok, parts := isNomadServer(m) 39 if !ok { 40 s.logger.Printf("[WARN] nomad: non-server in gossip pool: %s", m.Name) 41 continue 42 } 43 s.logger.Printf("[INFO] nomad: adding server %s", parts) 44 45 // Check if this server is known 46 found := false 47 s.peerLock.Lock() 48 existing := s.peers[parts.Region] 49 for idx, e := range existing { 50 if e.Name == parts.Name { 51 existing[idx] = parts 52 found = true 53 break 54 } 55 } 56 57 // Add ot the list if not known 58 if !found { 59 s.peers[parts.Region] = append(existing, parts) 60 } 61 62 // Check if a local peer 63 if parts.Region == s.config.Region { 64 s.localPeers[parts.Addr.String()] = parts 65 } 66 s.peerLock.Unlock() 67 68 // If we still expecting to bootstrap, may need to handle this 69 if s.config.BootstrapExpect != 0 { 70 s.maybeBootstrap() 71 } 72 } 73 } 74 75 // maybeBootsrap is used to handle bootstrapping when a new consul server joins 76 func (s *Server) maybeBootstrap() { 77 var index uint64 78 var err error 79 if s.raftStore != nil { 80 index, err = s.raftStore.LastIndex() 81 } else if s.raftInmem != nil { 82 index, err = s.raftInmem.LastIndex() 83 } else { 84 panic("neither raftInmem or raftStore is initialized") 85 } 86 if err != nil { 87 s.logger.Printf("[ERR] nomad: failed to read last raft index: %v", err) 88 return 89 } 90 91 // Bootstrap can only be done if there are no committed logs, 92 // remove our expectations of bootstrapping 93 if index != 0 { 94 s.config.BootstrapExpect = 0 95 return 96 } 97 98 // Scan for all the known servers 99 members := s.serf.Members() 100 addrs := make([]string, 0) 101 for _, member := range members { 102 valid, p := isNomadServer(member) 103 if !valid { 104 continue 105 } 106 if p.Region != s.config.Region { 107 continue 108 } 109 if p.Expect != 0 && p.Expect != s.config.BootstrapExpect { 110 s.logger.Printf("[ERR] nomad: peer %v has a conflicting expect value. All nodes should expect the same number.", member) 111 return 112 } 113 if p.Bootstrap { 114 s.logger.Printf("[ERR] nomad: peer %v has bootstrap mode. Expect disabled.", member) 115 return 116 } 117 addrs = append(addrs, p.Addr.String()) 118 } 119 120 // Skip if we haven't met the minimum expect count 121 if len(addrs) < s.config.BootstrapExpect { 122 return 123 } 124 125 // Update the peer set 126 s.logger.Printf("[INFO] nomad: Attempting bootstrap with nodes: %v", addrs) 127 if err := s.raft.SetPeers(addrs).Error(); err != nil { 128 s.logger.Printf("[ERR] nomad: failed to bootstrap peers: %v", err) 129 } 130 131 // Bootstrapping comlete, don't enter this again 132 s.config.BootstrapExpect = 0 133 } 134 135 // nodeFailed is used to handle fail events on the serf cluster 136 func (s *Server) nodeFailed(me serf.MemberEvent) { 137 for _, m := range me.Members { 138 ok, parts := isNomadServer(m) 139 if !ok { 140 continue 141 } 142 s.logger.Printf("[INFO] nomad: removing server %s", parts) 143 144 // Remove the server if known 145 s.peerLock.Lock() 146 existing := s.peers[parts.Region] 147 n := len(existing) 148 for i := 0; i < n; i++ { 149 if existing[i].Name == parts.Name { 150 existing[i], existing[n-1] = existing[n-1], nil 151 existing = existing[:n-1] 152 n-- 153 break 154 } 155 } 156 157 // Trim the list there are no known servers in a region 158 if n == 0 { 159 delete(s.peers, parts.Region) 160 } else { 161 s.peers[parts.Region] = existing 162 } 163 164 // Check if local peer 165 if parts.Region == s.config.Region { 166 delete(s.localPeers, parts.Addr.String()) 167 } 168 s.peerLock.Unlock() 169 } 170 } 171 172 // localMemberEvent is used to reconcile Serf events with the 173 // consistent store if we are the current leader. 174 func (s *Server) localMemberEvent(me serf.MemberEvent) { 175 // Do nothing if we are not the leader 176 if !s.IsLeader() { 177 return 178 } 179 180 // Check if this is a reap event 181 isReap := me.EventType() == serf.EventMemberReap 182 183 // Queue the members for reconciliation 184 for _, m := range me.Members { 185 // Change the status if this is a reap event 186 if isReap { 187 m.Status = StatusReap 188 } 189 select { 190 case s.reconcileCh <- m: 191 default: 192 } 193 } 194 }