gopkg.in/hashicorp/nomad.v0@v0.11.8/nomad/serf.go (about)

     1  package nomad
     2  
     3  import (
     4  	"strings"
     5  	"sync/atomic"
     6  	"time"
     7  
     8  	log "github.com/hashicorp/go-hclog"
     9  
    10  	"github.com/hashicorp/nomad/nomad/structs"
    11  	"github.com/hashicorp/raft"
    12  	"github.com/hashicorp/serf/serf"
    13  )
    14  
    15  const (
    16  	// StatusReap is used to update the status of a node if we
    17  	// are handling a EventMemberReap
    18  	StatusReap = serf.MemberStatus(-1)
    19  
    20  	// maxPeerRetries limits how many invalidate attempts are made
    21  	maxPeerRetries = 6
    22  
    23  	// peerRetryBase is a baseline retry time
    24  	peerRetryBase = 1 * time.Second
    25  )
    26  
    27  // serfEventHandler is used to handle events from the serf cluster
    28  func (s *Server) serfEventHandler() {
    29  	for {
    30  		select {
    31  		case e := <-s.eventCh:
    32  			switch e.EventType() {
    33  			case serf.EventMemberJoin:
    34  				s.nodeJoin(e.(serf.MemberEvent))
    35  				s.localMemberEvent(e.(serf.MemberEvent))
    36  			case serf.EventMemberLeave, serf.EventMemberFailed:
    37  				s.nodeFailed(e.(serf.MemberEvent))
    38  				s.localMemberEvent(e.(serf.MemberEvent))
    39  			case serf.EventMemberReap:
    40  				s.localMemberEvent(e.(serf.MemberEvent))
    41  			case serf.EventMemberUpdate, serf.EventUser, serf.EventQuery: // Ignore
    42  			default:
    43  				s.logger.Warn("unhandled serf event", "event", log.Fmt("%#v", e))
    44  			}
    45  
    46  		case <-s.shutdownCh:
    47  			return
    48  		}
    49  	}
    50  }
    51  
    52  // nodeJoin is used to handle join events on the serf cluster
    53  func (s *Server) nodeJoin(me serf.MemberEvent) {
    54  	for _, m := range me.Members {
    55  		ok, parts := isNomadServer(m)
    56  		if !ok {
    57  			s.logger.Warn("non-server in gossip pool", "member", m.Name)
    58  			continue
    59  		}
    60  		s.logger.Info("adding server", "server", parts)
    61  
    62  		// Check if this server is known
    63  		found := false
    64  		s.peerLock.Lock()
    65  		existing := s.peers[parts.Region]
    66  		for idx, e := range existing {
    67  			if e.Name == parts.Name {
    68  				existing[idx] = parts
    69  				found = true
    70  				break
    71  			}
    72  		}
    73  
    74  		// Add ot the list if not known
    75  		if !found {
    76  			s.peers[parts.Region] = append(existing, parts)
    77  		}
    78  
    79  		// Check if a local peer
    80  		if parts.Region == s.config.Region {
    81  			s.localPeers[raft.ServerAddress(parts.Addr.String())] = parts
    82  		}
    83  		s.peerLock.Unlock()
    84  
    85  		// If we still expecting to bootstrap, may need to handle this
    86  		if atomic.LoadInt32(&s.config.Bootstrapped) == 0 {
    87  			s.maybeBootstrap()
    88  		}
    89  	}
    90  }
    91  
    92  // maybeBootstrap is used to handle bootstrapping when a new server joins
    93  func (s *Server) maybeBootstrap() {
    94  	// Bootstrap can only be done if there are no committed logs, remove our
    95  	// expectations of bootstrapping. This is slightly cheaper than the full
    96  	// check that BootstrapCluster will do, so this is a good pre-filter.
    97  	var index uint64
    98  	var err error
    99  	if s.raftStore != nil {
   100  		index, err = s.raftStore.LastIndex()
   101  	} else if s.raftInmem != nil {
   102  		index, err = s.raftInmem.LastIndex()
   103  	} else {
   104  		panic("neither raftInmem or raftStore is initialized")
   105  	}
   106  	if err != nil {
   107  		s.logger.Error("failed to read last raft index", "error", err)
   108  		return
   109  	}
   110  
   111  	// Bootstrap can only be done if there are no committed logs,
   112  	// remove our expectations of bootstrapping
   113  	if index != 0 {
   114  		atomic.StoreInt32(&s.config.Bootstrapped, 1)
   115  		return
   116  	}
   117  
   118  	// Scan for all the known servers
   119  	members := s.serf.Members()
   120  	var servers []serverParts
   121  	voters := 0
   122  	for _, member := range members {
   123  		valid, p := isNomadServer(member)
   124  		if !valid {
   125  			continue
   126  		}
   127  		if p.Region != s.config.Region {
   128  			continue
   129  		}
   130  		if p.Expect != 0 && p.Expect != s.config.BootstrapExpect {
   131  			s.logger.Error("peer has a conflicting expect value. All nodes should expect the same number", "member", member)
   132  			return
   133  		}
   134  		if p.Bootstrap {
   135  			s.logger.Error("peer has bootstrap mode. Expect disabled", "member", member)
   136  			return
   137  		}
   138  		if !p.NonVoter {
   139  			voters++
   140  		}
   141  
   142  		servers = append(servers, *p)
   143  	}
   144  
   145  	// Skip if we haven't met the minimum expect count
   146  	if voters < s.config.BootstrapExpect {
   147  		return
   148  	}
   149  
   150  	// Query each of the servers and make sure they report no Raft peers.
   151  	req := &structs.GenericRequest{
   152  		QueryOptions: structs.QueryOptions{
   153  			AllowStale: true,
   154  		},
   155  	}
   156  	for _, server := range servers {
   157  		var peers []string
   158  
   159  		// Retry with exponential backoff to get peer status from this server
   160  		for attempt := uint(0); attempt < maxPeerRetries; attempt++ {
   161  			if err := s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion,
   162  				"Status.Peers", req, &peers); err != nil {
   163  				nextRetry := (1 << attempt) * peerRetryBase
   164  				s.logger.Error("failed to confirm peer status", "peer", server.Name, "error", err, "retry", nextRetry)
   165  				time.Sleep(nextRetry)
   166  			} else {
   167  				break
   168  			}
   169  		}
   170  
   171  		// Found a node with some Raft peers, stop bootstrap since there's
   172  		// evidence of an existing cluster. We should get folded in by the
   173  		// existing servers if that's the case, so it's cleaner to sit as a
   174  		// candidate with no peers so we don't cause spurious elections.
   175  		// It's OK this is racy, because even with an initial bootstrap
   176  		// as long as one peer runs bootstrap things will work, and if we
   177  		// have multiple peers bootstrap in the same way, that's OK. We
   178  		// just don't want a server added much later to do a live bootstrap
   179  		// and interfere with the cluster. This isn't required for Raft's
   180  		// correctness because no server in the existing cluster will vote
   181  		// for this server, but it makes things much more stable.
   182  		if len(peers) > 0 {
   183  			s.logger.Info("disabling bootstrap mode because existing Raft peers being reported by peer",
   184  				"peer_name", server.Name, "peer_address", server.Addr)
   185  			atomic.StoreInt32(&s.config.Bootstrapped, 1)
   186  			return
   187  		}
   188  	}
   189  
   190  	// Update the peer set
   191  	// Attempt a live bootstrap!
   192  	var configuration raft.Configuration
   193  	var addrs []string
   194  	minRaftVersion, err := s.autopilot.MinRaftProtocol()
   195  	if err != nil {
   196  		s.logger.Error("failed to read server raft versions", "error", err)
   197  	}
   198  
   199  	for _, server := range servers {
   200  		addr := server.Addr.String()
   201  		addrs = append(addrs, addr)
   202  		var id raft.ServerID
   203  		if minRaftVersion >= 3 {
   204  			id = raft.ServerID(server.ID)
   205  		} else {
   206  			id = raft.ServerID(addr)
   207  		}
   208  		suffrage := raft.Voter
   209  		if server.NonVoter {
   210  			suffrage = raft.Nonvoter
   211  		}
   212  		peer := raft.Server{
   213  			ID:       id,
   214  			Address:  raft.ServerAddress(addr),
   215  			Suffrage: suffrage,
   216  		}
   217  		configuration.Servers = append(configuration.Servers, peer)
   218  	}
   219  	s.logger.Info("found expected number of peers, attempting to bootstrap cluster...",
   220  		"peers", strings.Join(addrs, ","))
   221  	future := s.raft.BootstrapCluster(configuration)
   222  	if err := future.Error(); err != nil {
   223  		s.logger.Error("failed to bootstrap cluster", "error", err)
   224  	}
   225  
   226  	// Bootstrapping complete, or failed for some reason, don't enter this again
   227  	atomic.StoreInt32(&s.config.Bootstrapped, 1)
   228  }
   229  
   230  // nodeFailed is used to handle fail events on the serf cluster
   231  func (s *Server) nodeFailed(me serf.MemberEvent) {
   232  	for _, m := range me.Members {
   233  		ok, parts := isNomadServer(m)
   234  		if !ok {
   235  			continue
   236  		}
   237  		s.logger.Info("removing server", "server", parts)
   238  
   239  		// Remove the server if known
   240  		s.peerLock.Lock()
   241  		existing := s.peers[parts.Region]
   242  		n := len(existing)
   243  		for i := 0; i < n; i++ {
   244  			if existing[i].Name == parts.Name {
   245  				existing[i], existing[n-1] = existing[n-1], nil
   246  				existing = existing[:n-1]
   247  				n--
   248  				break
   249  			}
   250  		}
   251  
   252  		// Trim the list there are no known servers in a region
   253  		if n == 0 {
   254  			delete(s.peers, parts.Region)
   255  		} else {
   256  			s.peers[parts.Region] = existing
   257  		}
   258  
   259  		// Check if local peer
   260  		if parts.Region == s.config.Region {
   261  			delete(s.localPeers, raft.ServerAddress(parts.Addr.String()))
   262  		}
   263  		s.peerLock.Unlock()
   264  	}
   265  }
   266  
   267  // localMemberEvent is used to reconcile Serf events with the
   268  // consistent store if we are the current leader.
   269  func (s *Server) localMemberEvent(me serf.MemberEvent) {
   270  	// Do nothing if we are not the leader
   271  	if !s.IsLeader() {
   272  		return
   273  	}
   274  
   275  	// Check if this is a reap event
   276  	isReap := me.EventType() == serf.EventMemberReap
   277  
   278  	// Queue the members for reconciliation
   279  	for _, m := range me.Members {
   280  		// Change the status if this is a reap event
   281  		if isReap {
   282  			m.Status = StatusReap
   283  		}
   284  		select {
   285  		case s.reconcileCh <- m:
   286  		default:
   287  		}
   288  	}
   289  }