github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/serf.go (about)

     1  package nomad
     2  
     3  import (
     4  	"sync/atomic"
     5  
     6  	"github.com/hashicorp/serf/serf"
     7  )
     8  
     9  const (
    10  	// StatusReap is used to update the status of a node if we
    11  	// are handling a EventMemberReap
    12  	StatusReap = serf.MemberStatus(-1)
    13  )
    14  
    15  // serfEventHandler is used to handle events from the serf cluster
    16  func (s *Server) serfEventHandler() {
    17  	for {
    18  		select {
    19  		case e := <-s.eventCh:
    20  			switch e.EventType() {
    21  			case serf.EventMemberJoin:
    22  				s.nodeJoin(e.(serf.MemberEvent))
    23  				s.localMemberEvent(e.(serf.MemberEvent))
    24  			case serf.EventMemberLeave, serf.EventMemberFailed:
    25  				s.nodeFailed(e.(serf.MemberEvent))
    26  				s.localMemberEvent(e.(serf.MemberEvent))
    27  			case serf.EventMemberUpdate, serf.EventMemberReap,
    28  				serf.EventUser, serf.EventQuery: // Ignore
    29  			default:
    30  				s.logger.Printf("[WARN] nomad: unhandled serf event: %#v", e)
    31  			}
    32  
    33  		case <-s.shutdownCh:
    34  			return
    35  		}
    36  	}
    37  }
    38  
    39  // nodeJoin is used to handle join events on the serf cluster
    40  func (s *Server) nodeJoin(me serf.MemberEvent) {
    41  	for _, m := range me.Members {
    42  		ok, parts := isNomadServer(m)
    43  		if !ok {
    44  			s.logger.Printf("[WARN] nomad: non-server in gossip pool: %s", m.Name)
    45  			continue
    46  		}
    47  		s.logger.Printf("[INFO] nomad: adding server %s", parts)
    48  
    49  		// Check if this server is known
    50  		found := false
    51  		s.peerLock.Lock()
    52  		existing := s.peers[parts.Region]
    53  		for idx, e := range existing {
    54  			if e.Name == parts.Name {
    55  				existing[idx] = parts
    56  				found = true
    57  				break
    58  			}
    59  		}
    60  
    61  		// Add ot the list if not known
    62  		if !found {
    63  			s.peers[parts.Region] = append(existing, parts)
    64  		}
    65  
    66  		// Check if a local peer
    67  		if parts.Region == s.config.Region {
    68  			s.localPeers[parts.Addr.String()] = parts
    69  		}
    70  		s.peerLock.Unlock()
    71  
    72  		// If we still expecting to bootstrap, may need to handle this
    73  		if atomic.LoadInt32(&s.config.BootstrapExpect) != 0 {
    74  			s.maybeBootstrap()
    75  		}
    76  	}
    77  }
    78  
    79  // maybeBootsrap is used to handle bootstrapping when a new server joins
    80  func (s *Server) maybeBootstrap() {
    81  	var index uint64
    82  	var err error
    83  	if s.raftStore != nil {
    84  		index, err = s.raftStore.LastIndex()
    85  	} else if s.raftInmem != nil {
    86  		index, err = s.raftInmem.LastIndex()
    87  	} else {
    88  		panic("neither raftInmem or raftStore is initialized")
    89  	}
    90  	if err != nil {
    91  		s.logger.Printf("[ERR] nomad: failed to read last raft index: %v", err)
    92  		return
    93  	}
    94  
    95  	// Bootstrap can only be done if there are no committed logs,
    96  	// remove our expectations of bootstrapping
    97  	if index != 0 {
    98  		atomic.StoreInt32(&s.config.BootstrapExpect, 0)
    99  		return
   100  	}
   101  
   102  	// Scan for all the known servers
   103  	members := s.serf.Members()
   104  	addrs := make([]string, 0)
   105  	for _, member := range members {
   106  		valid, p := isNomadServer(member)
   107  		if !valid {
   108  			continue
   109  		}
   110  		if p.Region != s.config.Region {
   111  			continue
   112  		}
   113  		if p.Expect != 0 && p.Expect != int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
   114  			s.logger.Printf("[ERR] nomad: peer %v has a conflicting expect value. All nodes should expect the same number.", member)
   115  			return
   116  		}
   117  		if p.Bootstrap {
   118  			s.logger.Printf("[ERR] nomad: peer %v has bootstrap mode. Expect disabled.", member)
   119  			return
   120  		}
   121  		addrs = append(addrs, p.Addr.String())
   122  	}
   123  
   124  	// Skip if we haven't met the minimum expect count
   125  	if len(addrs) < int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
   126  		return
   127  	}
   128  
   129  	// Update the peer set
   130  	s.logger.Printf("[INFO] nomad: Attempting bootstrap with nodes: %v", addrs)
   131  	if err := s.raft.SetPeers(addrs).Error(); err != nil {
   132  		s.logger.Printf("[ERR] nomad: failed to bootstrap peers: %v", err)
   133  	}
   134  
   135  	// Bootstrapping complete, don't enter this again
   136  	atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   137  }
   138  
   139  // nodeFailed is used to handle fail events on the serf cluster
   140  func (s *Server) nodeFailed(me serf.MemberEvent) {
   141  	for _, m := range me.Members {
   142  		ok, parts := isNomadServer(m)
   143  		if !ok {
   144  			continue
   145  		}
   146  		s.logger.Printf("[INFO] nomad: removing server %s", parts)
   147  
   148  		// Remove the server if known
   149  		s.peerLock.Lock()
   150  		existing := s.peers[parts.Region]
   151  		n := len(existing)
   152  		for i := 0; i < n; i++ {
   153  			if existing[i].Name == parts.Name {
   154  				existing[i], existing[n-1] = existing[n-1], nil
   155  				existing = existing[:n-1]
   156  				n--
   157  				break
   158  			}
   159  		}
   160  
   161  		// Trim the list there are no known servers in a region
   162  		if n == 0 {
   163  			delete(s.peers, parts.Region)
   164  		} else {
   165  			s.peers[parts.Region] = existing
   166  		}
   167  
   168  		// Check if local peer
   169  		if parts.Region == s.config.Region {
   170  			delete(s.localPeers, parts.Addr.String())
   171  		}
   172  		s.peerLock.Unlock()
   173  	}
   174  }
   175  
   176  // localMemberEvent is used to reconcile Serf events with the
   177  // consistent store if we are the current leader.
   178  func (s *Server) localMemberEvent(me serf.MemberEvent) {
   179  	// Do nothing if we are not the leader
   180  	if !s.IsLeader() {
   181  		return
   182  	}
   183  
   184  	// Check if this is a reap event
   185  	isReap := me.EventType() == serf.EventMemberReap
   186  
   187  	// Queue the members for reconciliation
   188  	for _, m := range me.Members {
   189  		// Change the status if this is a reap event
   190  		if isReap {
   191  			m.Status = StatusReap
   192  		}
   193  		select {
   194  		case s.reconcileCh <- m:
   195  		default:
   196  		}
   197  	}
   198  }