github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/serf.go (about)

     1  package nomad
     2  
     3  import (
     4  	"strings"
     5  	"sync/atomic"
     6  	"time"
     7  
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  	"github.com/hashicorp/raft"
    10  	"github.com/hashicorp/serf/serf"
    11  )
    12  
    13  const (
    14  	// StatusReap is used to update the status of a node if we
    15  	// are handling a EventMemberReap
    16  	StatusReap = serf.MemberStatus(-1)
    17  
    18  	// maxPeerRetries limits how many invalidate attempts are made
    19  	maxPeerRetries = 6
    20  
    21  	// peerRetryBase is a baseline retry time
    22  	peerRetryBase = 1 * time.Second
    23  )
    24  
    25  // serfEventHandler is used to handle events from the serf cluster
    26  func (s *Server) serfEventHandler() {
    27  	for {
    28  		select {
    29  		case e := <-s.eventCh:
    30  			switch e.EventType() {
    31  			case serf.EventMemberJoin:
    32  				s.nodeJoin(e.(serf.MemberEvent))
    33  				s.localMemberEvent(e.(serf.MemberEvent))
    34  			case serf.EventMemberLeave, serf.EventMemberFailed:
    35  				s.nodeFailed(e.(serf.MemberEvent))
    36  				s.localMemberEvent(e.(serf.MemberEvent))
    37  			case serf.EventMemberReap:
    38  				s.localMemberEvent(e.(serf.MemberEvent))
    39  			case serf.EventMemberUpdate, serf.EventUser, serf.EventQuery: // Ignore
    40  			default:
    41  				s.logger.Printf("[WARN] nomad: unhandled serf event: %#v", e)
    42  			}
    43  
    44  		case <-s.shutdownCh:
    45  			return
    46  		}
    47  	}
    48  }
    49  
    50  // nodeJoin is used to handle join events on the serf cluster
    51  func (s *Server) nodeJoin(me serf.MemberEvent) {
    52  	for _, m := range me.Members {
    53  		ok, parts := isNomadServer(m)
    54  		if !ok {
    55  			s.logger.Printf("[WARN] nomad: non-server in gossip pool: %s", m.Name)
    56  			continue
    57  		}
    58  		s.logger.Printf("[INFO] nomad: adding server %s", parts)
    59  
    60  		// Check if this server is known
    61  		found := false
    62  		s.peerLock.Lock()
    63  		existing := s.peers[parts.Region]
    64  		for idx, e := range existing {
    65  			if e.Name == parts.Name {
    66  				existing[idx] = parts
    67  				found = true
    68  				break
    69  			}
    70  		}
    71  
    72  		// Add ot the list if not known
    73  		if !found {
    74  			s.peers[parts.Region] = append(existing, parts)
    75  		}
    76  
    77  		// Check if a local peer
    78  		if parts.Region == s.config.Region {
    79  			s.localPeers[raft.ServerAddress(parts.Addr.String())] = parts
    80  		}
    81  		s.peerLock.Unlock()
    82  
    83  		// If we still expecting to bootstrap, may need to handle this
    84  		if atomic.LoadInt32(&s.config.BootstrapExpect) != 0 {
    85  			s.maybeBootstrap()
    86  		}
    87  	}
    88  }
    89  
    90  // maybeBootstrap is used to handle bootstrapping when a new server joins
    91  func (s *Server) maybeBootstrap() {
    92  	// Bootstrap can only be done if there are no committed logs, remove our
    93  	// expectations of bootstrapping. This is slightly cheaper than the full
    94  	// check that BootstrapCluster will do, so this is a good pre-filter.
    95  	var index uint64
    96  	var err error
    97  	if s.raftStore != nil {
    98  		index, err = s.raftStore.LastIndex()
    99  	} else if s.raftInmem != nil {
   100  		index, err = s.raftInmem.LastIndex()
   101  	} else {
   102  		panic("neither raftInmem or raftStore is initialized")
   103  	}
   104  	if err != nil {
   105  		s.logger.Printf("[ERR] nomad: failed to read last raft index: %v", err)
   106  		return
   107  	}
   108  
   109  	// Bootstrap can only be done if there are no committed logs,
   110  	// remove our expectations of bootstrapping
   111  	if index != 0 {
   112  		atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   113  		return
   114  	}
   115  
   116  	// Scan for all the known servers
   117  	members := s.serf.Members()
   118  	var servers []serverParts
   119  	for _, member := range members {
   120  		valid, p := isNomadServer(member)
   121  		if !valid {
   122  			continue
   123  		}
   124  		if p.Region != s.config.Region {
   125  			continue
   126  		}
   127  		if p.Expect != 0 && p.Expect != int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
   128  			s.logger.Printf("[ERR] nomad: peer %v has a conflicting expect value. All nodes should expect the same number.", member)
   129  			return
   130  		}
   131  		if p.Bootstrap {
   132  			s.logger.Printf("[ERR] nomad: peer %v has bootstrap mode. Expect disabled.", member)
   133  			return
   134  		}
   135  		servers = append(servers, *p)
   136  	}
   137  
   138  	// Skip if we haven't met the minimum expect count
   139  	if len(servers) < int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
   140  		return
   141  	}
   142  
   143  	// Query each of the servers and make sure they report no Raft peers.
   144  	req := &structs.GenericRequest{
   145  		QueryOptions: structs.QueryOptions{
   146  			AllowStale: true,
   147  		},
   148  	}
   149  	for _, server := range servers {
   150  		var peers []string
   151  
   152  		// Retry with exponential backoff to get peer status from this server
   153  		for attempt := uint(0); attempt < maxPeerRetries; attempt++ {
   154  			if err := s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion,
   155  				"Status.Peers", req, &peers); err != nil {
   156  				nextRetry := (1 << attempt) * peerRetryBase
   157  				s.logger.Printf("[ERR] nomad: Failed to confirm peer status for %s: %v. Retrying in "+
   158  					"%v...", server.Name, err, nextRetry.String())
   159  				time.Sleep(nextRetry)
   160  			} else {
   161  				break
   162  			}
   163  		}
   164  
   165  		// Found a node with some Raft peers, stop bootstrap since there's
   166  		// evidence of an existing cluster. We should get folded in by the
   167  		// existing servers if that's the case, so it's cleaner to sit as a
   168  		// candidate with no peers so we don't cause spurious elections.
   169  		// It's OK this is racy, because even with an initial bootstrap
   170  		// as long as one peer runs bootstrap things will work, and if we
   171  		// have multiple peers bootstrap in the same way, that's OK. We
   172  		// just don't want a server added much later to do a live bootstrap
   173  		// and interfere with the cluster. This isn't required for Raft's
   174  		// correctness because no server in the existing cluster will vote
   175  		// for this server, but it makes things much more stable.
   176  		if len(peers) > 0 {
   177  			s.logger.Printf("[INFO] nomad: Existing Raft peers reported by %s (%v), disabling bootstrap mode", server.Name, server.Addr)
   178  			atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   179  			return
   180  		}
   181  	}
   182  
   183  	// Update the peer set
   184  	// Attempt a live bootstrap!
   185  	var configuration raft.Configuration
   186  	var addrs []string
   187  	minRaftVersion, err := s.autopilot.MinRaftProtocol()
   188  	if err != nil {
   189  		s.logger.Printf("[ERR] nomad: Failed to read server raft versions: %v", err)
   190  	}
   191  
   192  	for _, server := range servers {
   193  		addr := server.Addr.String()
   194  		addrs = append(addrs, addr)
   195  		var id raft.ServerID
   196  		if minRaftVersion >= 3 {
   197  			id = raft.ServerID(server.ID)
   198  		} else {
   199  			id = raft.ServerID(addr)
   200  		}
   201  		peer := raft.Server{
   202  			ID:      id,
   203  			Address: raft.ServerAddress(addr),
   204  		}
   205  		configuration.Servers = append(configuration.Servers, peer)
   206  	}
   207  	s.logger.Printf("[INFO] nomad: Found expected number of peers (%s), attempting to bootstrap cluster...",
   208  		strings.Join(addrs, ","))
   209  	future := s.raft.BootstrapCluster(configuration)
   210  	if err := future.Error(); err != nil {
   211  		s.logger.Printf("[ERR] nomad: Failed to bootstrap cluster: %v", err)
   212  	}
   213  
   214  	// Bootstrapping complete, or failed for some reason, don't enter this again
   215  	atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   216  }
   217  
   218  // nodeFailed is used to handle fail events on the serf cluster
   219  func (s *Server) nodeFailed(me serf.MemberEvent) {
   220  	for _, m := range me.Members {
   221  		ok, parts := isNomadServer(m)
   222  		if !ok {
   223  			continue
   224  		}
   225  		s.logger.Printf("[INFO] nomad: removing server %s", parts)
   226  
   227  		// Remove the server if known
   228  		s.peerLock.Lock()
   229  		existing := s.peers[parts.Region]
   230  		n := len(existing)
   231  		for i := 0; i < n; i++ {
   232  			if existing[i].Name == parts.Name {
   233  				existing[i], existing[n-1] = existing[n-1], nil
   234  				existing = existing[:n-1]
   235  				n--
   236  				break
   237  			}
   238  		}
   239  
   240  		// Trim the list there are no known servers in a region
   241  		if n == 0 {
   242  			delete(s.peers, parts.Region)
   243  		} else {
   244  			s.peers[parts.Region] = existing
   245  		}
   246  
   247  		// Check if local peer
   248  		if parts.Region == s.config.Region {
   249  			delete(s.localPeers, raft.ServerAddress(parts.Addr.String()))
   250  		}
   251  		s.peerLock.Unlock()
   252  	}
   253  }
   254  
   255  // localMemberEvent is used to reconcile Serf events with the
   256  // consistent store if we are the current leader.
   257  func (s *Server) localMemberEvent(me serf.MemberEvent) {
   258  	// Do nothing if we are not the leader
   259  	if !s.IsLeader() {
   260  		return
   261  	}
   262  
   263  	// Check if this is a reap event
   264  	isReap := me.EventType() == serf.EventMemberReap
   265  
   266  	// Queue the members for reconciliation
   267  	for _, m := range me.Members {
   268  		// Change the status if this is a reap event
   269  		if isReap {
   270  			m.Status = StatusReap
   271  		}
   272  		select {
   273  		case s.reconcileCh <- m:
   274  		default:
   275  		}
   276  	}
   277  }