github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/serf.go (about)

     1  package nomad
     2  
     3  import "github.com/hashicorp/serf/serf"
     4  
     5  const (
     6  	// StatusReap is used to update the status of a node if we
     7  	// are handling a EventMemberReap
     8  	StatusReap = serf.MemberStatus(-1)
     9  )
    10  
    11  // serfEventHandler is used to handle events from the serf cluster
    12  func (s *Server) serfEventHandler() {
    13  	for {
    14  		select {
    15  		case e := <-s.eventCh:
    16  			switch e.EventType() {
    17  			case serf.EventMemberJoin:
    18  				s.nodeJoin(e.(serf.MemberEvent))
    19  				s.localMemberEvent(e.(serf.MemberEvent))
    20  			case serf.EventMemberLeave, serf.EventMemberFailed:
    21  				s.nodeFailed(e.(serf.MemberEvent))
    22  				s.localMemberEvent(e.(serf.MemberEvent))
    23  			case serf.EventMemberUpdate, serf.EventMemberReap,
    24  				serf.EventUser, serf.EventQuery: // Ignore
    25  			default:
    26  				s.logger.Printf("[WARN] nomad: unhandled serf event: %#v", e)
    27  			}
    28  
    29  		case <-s.shutdownCh:
    30  			return
    31  		}
    32  	}
    33  }
    34  
    35  // nodeJoin is used to handle join events on the serf cluster
    36  func (s *Server) nodeJoin(me serf.MemberEvent) {
    37  	for _, m := range me.Members {
    38  		ok, parts := isNomadServer(m)
    39  		if !ok {
    40  			s.logger.Printf("[WARN] nomad: non-server in gossip pool: %s", m.Name)
    41  			continue
    42  		}
    43  		s.logger.Printf("[INFO] nomad: adding server %s", parts)
    44  
    45  		// Check if this server is known
    46  		found := false
    47  		s.peerLock.Lock()
    48  		existing := s.peers[parts.Region]
    49  		for idx, e := range existing {
    50  			if e.Name == parts.Name {
    51  				existing[idx] = parts
    52  				found = true
    53  				break
    54  			}
    55  		}
    56  
    57  		// Add ot the list if not known
    58  		if !found {
    59  			s.peers[parts.Region] = append(existing, parts)
    60  		}
    61  
    62  		// Check if a local peer
    63  		if parts.Region == s.config.Region {
    64  			s.localPeers[parts.Addr.String()] = parts
    65  		}
    66  		s.peerLock.Unlock()
    67  
    68  		// If we still expecting to bootstrap, may need to handle this
    69  		if s.config.BootstrapExpect != 0 {
    70  			s.maybeBootstrap()
    71  		}
    72  	}
    73  }
    74  
    75  // maybeBootsrap is used to handle bootstrapping when a new consul server joins
    76  func (s *Server) maybeBootstrap() {
    77  	var index uint64
    78  	var err error
    79  	if s.raftStore != nil {
    80  		index, err = s.raftStore.LastIndex()
    81  	} else if s.raftInmem != nil {
    82  		index, err = s.raftInmem.LastIndex()
    83  	} else {
    84  		panic("neither raftInmem or raftStore is initialized")
    85  	}
    86  	if err != nil {
    87  		s.logger.Printf("[ERR] nomad: failed to read last raft index: %v", err)
    88  		return
    89  	}
    90  
    91  	// Bootstrap can only be done if there are no committed logs,
    92  	// remove our expectations of bootstrapping
    93  	if index != 0 {
    94  		s.config.BootstrapExpect = 0
    95  		return
    96  	}
    97  
    98  	// Scan for all the known servers
    99  	members := s.serf.Members()
   100  	addrs := make([]string, 0)
   101  	for _, member := range members {
   102  		valid, p := isNomadServer(member)
   103  		if !valid {
   104  			continue
   105  		}
   106  		if p.Region != s.config.Region {
   107  			continue
   108  		}
   109  		if p.Expect != 0 && p.Expect != s.config.BootstrapExpect {
   110  			s.logger.Printf("[ERR] nomad: peer %v has a conflicting expect value. All nodes should expect the same number.", member)
   111  			return
   112  		}
   113  		if p.Bootstrap {
   114  			s.logger.Printf("[ERR] nomad: peer %v has bootstrap mode. Expect disabled.", member)
   115  			return
   116  		}
   117  		addrs = append(addrs, p.Addr.String())
   118  	}
   119  
   120  	// Skip if we haven't met the minimum expect count
   121  	if len(addrs) < s.config.BootstrapExpect {
   122  		return
   123  	}
   124  
   125  	// Update the peer set
   126  	s.logger.Printf("[INFO] nomad: Attempting bootstrap with nodes: %v", addrs)
   127  	if err := s.raft.SetPeers(addrs).Error(); err != nil {
   128  		s.logger.Printf("[ERR] nomad: failed to bootstrap peers: %v", err)
   129  	}
   130  
   131  	// Bootstrapping comlete, don't enter this again
   132  	s.config.BootstrapExpect = 0
   133  }
   134  
   135  // nodeFailed is used to handle fail events on the serf cluster
   136  func (s *Server) nodeFailed(me serf.MemberEvent) {
   137  	for _, m := range me.Members {
   138  		ok, parts := isNomadServer(m)
   139  		if !ok {
   140  			continue
   141  		}
   142  		s.logger.Printf("[INFO] nomad: removing server %s", parts)
   143  
   144  		// Remove the server if known
   145  		s.peerLock.Lock()
   146  		existing := s.peers[parts.Region]
   147  		n := len(existing)
   148  		for i := 0; i < n; i++ {
   149  			if existing[i].Name == parts.Name {
   150  				existing[i], existing[n-1] = existing[n-1], nil
   151  				existing = existing[:n-1]
   152  				n--
   153  				break
   154  			}
   155  		}
   156  
   157  		// Trim the list there are no known servers in a region
   158  		if n == 0 {
   159  			delete(s.peers, parts.Region)
   160  		} else {
   161  			s.peers[parts.Region] = existing
   162  		}
   163  
   164  		// Check if local peer
   165  		if parts.Region == s.config.Region {
   166  			delete(s.localPeers, parts.Addr.String())
   167  		}
   168  		s.peerLock.Unlock()
   169  	}
   170  }
   171  
   172  // localMemberEvent is used to reconcile Serf events with the
   173  // consistent store if we are the current leader.
   174  func (s *Server) localMemberEvent(me serf.MemberEvent) {
   175  	// Do nothing if we are not the leader
   176  	if !s.IsLeader() {
   177  		return
   178  	}
   179  
   180  	// Check if this is a reap event
   181  	isReap := me.EventType() == serf.EventMemberReap
   182  
   183  	// Queue the members for reconciliation
   184  	for _, m := range me.Members {
   185  		// Change the status if this is a reap event
   186  		if isReap {
   187  			m.Status = StatusReap
   188  		}
   189  		select {
   190  		case s.reconcileCh <- m:
   191  		default:
   192  		}
   193  	}
   194  }