github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/nomad/serf.go (about)

     1  package nomad
     2  
     3  import (
     4  	"strings"
     5  	"sync/atomic"
     6  	"time"
     7  
     8  	log "github.com/hashicorp/go-hclog"
     9  
    10  	"github.com/hashicorp/nomad/nomad/structs"
    11  	"github.com/hashicorp/raft"
    12  	"github.com/hashicorp/serf/serf"
    13  )
    14  
    15  const (
    16  	// StatusReap is used to update the status of a node if we
    17  	// are handling a EventMemberReap
    18  	StatusReap = serf.MemberStatus(-1)
    19  
    20  	// maxPeerRetries limits how many invalidate attempts are made
    21  	maxPeerRetries = 6
    22  
    23  	// peerRetryBase is a baseline retry time
    24  	peerRetryBase = 1 * time.Second
    25  )
    26  
    27  // serfEventHandler is used to handle events from the serf cluster
    28  func (s *Server) serfEventHandler() {
    29  	for {
    30  		select {
    31  		case e := <-s.eventCh:
    32  			switch e.EventType() {
    33  			case serf.EventMemberJoin:
    34  				s.nodeJoin(e.(serf.MemberEvent))
    35  				s.localMemberEvent(e.(serf.MemberEvent))
    36  			case serf.EventMemberLeave, serf.EventMemberFailed:
    37  				s.nodeFailed(e.(serf.MemberEvent))
    38  				s.localMemberEvent(e.(serf.MemberEvent))
    39  			case serf.EventMemberReap:
    40  				s.localMemberEvent(e.(serf.MemberEvent))
    41  			case serf.EventMemberUpdate, serf.EventUser, serf.EventQuery: // Ignore
    42  			default:
    43  				s.logger.Warn("unhandled serf event", "event", log.Fmt("%#v", e))
    44  			}
    45  
    46  		case <-s.shutdownCh:
    47  			return
    48  		}
    49  	}
    50  }
    51  
    52  // nodeJoin is used to handle join events on the serf cluster
    53  func (s *Server) nodeJoin(me serf.MemberEvent) {
    54  	for _, m := range me.Members {
    55  		ok, parts := isNomadServer(m)
    56  		if !ok {
    57  			s.logger.Warn("non-server in gossip pool", "member", m.Name)
    58  			continue
    59  		}
    60  		s.logger.Info("adding server", "server", parts)
    61  
    62  		// Check if this server is known
    63  		found := false
    64  		s.peerLock.Lock()
    65  		existing := s.peers[parts.Region]
    66  		for idx, e := range existing {
    67  			if e.Name == parts.Name {
    68  				existing[idx] = parts
    69  				found = true
    70  				break
    71  			}
    72  		}
    73  
    74  		// Add ot the list if not known
    75  		if !found {
    76  			s.peers[parts.Region] = append(existing, parts)
    77  		}
    78  
    79  		// Check if a local peer
    80  		if parts.Region == s.config.Region {
    81  			s.localPeers[raft.ServerAddress(parts.Addr.String())] = parts
    82  		}
    83  		s.peerLock.Unlock()
    84  
    85  		// If we still expecting to bootstrap, may need to handle this
    86  		if atomic.LoadInt32(&s.config.BootstrapExpect) != 0 {
    87  			s.maybeBootstrap()
    88  		}
    89  	}
    90  }
    91  
    92  // maybeBootstrap is used to handle bootstrapping when a new server joins
    93  func (s *Server) maybeBootstrap() {
    94  	// Bootstrap can only be done if there are no committed logs, remove our
    95  	// expectations of bootstrapping. This is slightly cheaper than the full
    96  	// check that BootstrapCluster will do, so this is a good pre-filter.
    97  	var index uint64
    98  	var err error
    99  	if s.raftStore != nil {
   100  		index, err = s.raftStore.LastIndex()
   101  	} else if s.raftInmem != nil {
   102  		index, err = s.raftInmem.LastIndex()
   103  	} else {
   104  		panic("neither raftInmem or raftStore is initialized")
   105  	}
   106  	if err != nil {
   107  		s.logger.Error("failed to read last raft index", "error", err)
   108  		return
   109  	}
   110  
   111  	// Bootstrap can only be done if there are no committed logs,
   112  	// remove our expectations of bootstrapping
   113  	if index != 0 {
   114  		atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   115  		return
   116  	}
   117  
   118  	// Scan for all the known servers
   119  	members := s.serf.Members()
   120  	var servers []serverParts
   121  	voters := 0
   122  	for _, member := range members {
   123  		valid, p := isNomadServer(member)
   124  		if !valid {
   125  			continue
   126  		}
   127  		if p.Region != s.config.Region {
   128  			continue
   129  		}
   130  		if p.Expect != 0 && p.Expect != int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
   131  			s.logger.Error("peer has a conflicting expect value. All nodes should expect the same number", "member", member)
   132  			return
   133  		}
   134  		if p.Bootstrap {
   135  			s.logger.Error("peer has bootstrap mode. Expect disabled", "member", member)
   136  			return
   137  		}
   138  		if !p.NonVoter {
   139  			voters++
   140  		}
   141  		servers = append(servers, *p)
   142  	}
   143  
   144  	// Skip if we haven't met the minimum expect count
   145  	if voters < int(atomic.LoadInt32(&s.config.BootstrapExpect)) {
   146  		return
   147  	}
   148  
   149  	// Query each of the servers and make sure they report no Raft peers.
   150  	req := &structs.GenericRequest{
   151  		QueryOptions: structs.QueryOptions{
   152  			AllowStale: true,
   153  		},
   154  	}
   155  	for _, server := range servers {
   156  		var peers []string
   157  
   158  		// Retry with exponential backoff to get peer status from this server
   159  		for attempt := uint(0); attempt < maxPeerRetries; attempt++ {
   160  			if err := s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion,
   161  				"Status.Peers", req, &peers); err != nil {
   162  				nextRetry := (1 << attempt) * peerRetryBase
   163  				s.logger.Error("failed to confirm peer status", "peer", server.Name, "error", err, "retry", nextRetry)
   164  				time.Sleep(nextRetry)
   165  			} else {
   166  				break
   167  			}
   168  		}
   169  
   170  		// Found a node with some Raft peers, stop bootstrap since there's
   171  		// evidence of an existing cluster. We should get folded in by the
   172  		// existing servers if that's the case, so it's cleaner to sit as a
   173  		// candidate with no peers so we don't cause spurious elections.
   174  		// It's OK this is racy, because even with an initial bootstrap
   175  		// as long as one peer runs bootstrap things will work, and if we
   176  		// have multiple peers bootstrap in the same way, that's OK. We
   177  		// just don't want a server added much later to do a live bootstrap
   178  		// and interfere with the cluster. This isn't required for Raft's
   179  		// correctness because no server in the existing cluster will vote
   180  		// for this server, but it makes things much more stable.
   181  		if len(peers) > 0 {
   182  			s.logger.Info("disabling bootstrap mode because existing Raft peers being reported by peer",
   183  				"peer_name", server.Name, "peer_address", server.Addr)
   184  			atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   185  			return
   186  		}
   187  	}
   188  
   189  	// Update the peer set
   190  	// Attempt a live bootstrap!
   191  	var configuration raft.Configuration
   192  	var addrs []string
   193  	minRaftVersion, err := s.autopilot.MinRaftProtocol()
   194  	if err != nil {
   195  		s.logger.Error("failed to read server raft versions", "error", err)
   196  	}
   197  
   198  	for _, server := range servers {
   199  		addr := server.Addr.String()
   200  		addrs = append(addrs, addr)
   201  		var id raft.ServerID
   202  		if minRaftVersion >= 3 {
   203  			id = raft.ServerID(server.ID)
   204  		} else {
   205  			id = raft.ServerID(addr)
   206  		}
   207  		suffrage := raft.Voter
   208  		if server.NonVoter {
   209  			suffrage = raft.Nonvoter
   210  		}
   211  		peer := raft.Server{
   212  			ID:       id,
   213  			Address:  raft.ServerAddress(addr),
   214  			Suffrage: suffrage,
   215  		}
   216  		configuration.Servers = append(configuration.Servers, peer)
   217  	}
   218  	s.logger.Info("found expected number of peers, attempting to bootstrap cluster...",
   219  		"peers", strings.Join(addrs, ","))
   220  	future := s.raft.BootstrapCluster(configuration)
   221  	if err := future.Error(); err != nil {
   222  		s.logger.Error("failed to bootstrap cluster", "error", err)
   223  	}
   224  
   225  	// Bootstrapping complete, or failed for some reason, don't enter this again
   226  	atomic.StoreInt32(&s.config.BootstrapExpect, 0)
   227  }
   228  
   229  // nodeFailed is used to handle fail events on the serf cluster
   230  func (s *Server) nodeFailed(me serf.MemberEvent) {
   231  	for _, m := range me.Members {
   232  		ok, parts := isNomadServer(m)
   233  		if !ok {
   234  			continue
   235  		}
   236  		s.logger.Info("removing server", "server", parts)
   237  
   238  		// Remove the server if known
   239  		s.peerLock.Lock()
   240  		existing := s.peers[parts.Region]
   241  		n := len(existing)
   242  		for i := 0; i < n; i++ {
   243  			if existing[i].Name == parts.Name {
   244  				existing[i], existing[n-1] = existing[n-1], nil
   245  				existing = existing[:n-1]
   246  				n--
   247  				break
   248  			}
   249  		}
   250  
   251  		// Trim the list there are no known servers in a region
   252  		if n == 0 {
   253  			delete(s.peers, parts.Region)
   254  		} else {
   255  			s.peers[parts.Region] = existing
   256  		}
   257  
   258  		// Check if local peer
   259  		if parts.Region == s.config.Region {
   260  			delete(s.localPeers, raft.ServerAddress(parts.Addr.String()))
   261  		}
   262  		s.peerLock.Unlock()
   263  	}
   264  }
   265  
   266  // localMemberEvent is used to reconcile Serf events with the
   267  // consistent store if we are the current leader.
   268  func (s *Server) localMemberEvent(me serf.MemberEvent) {
   269  	// Do nothing if we are not the leader
   270  	if !s.IsLeader() {
   271  		return
   272  	}
   273  
   274  	// Check if this is a reap event
   275  	isReap := me.EventType() == serf.EventMemberReap
   276  
   277  	// Queue the members for reconciliation
   278  	for _, m := range me.Members {
   279  		// Change the status if this is a reap event
   280  		if isReap {
   281  			m.Status = StatusReap
   282  		}
   283  		select {
   284  		case s.reconcileCh <- m:
   285  		default:
   286  		}
   287  	}
   288  }