github.phpd.cn/hashicorp/consul@v1.4.5/agent/consul/autopilot/autopilot.go (about)

     1  package autopilot
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"log"
     7  	"net"
     8  	"strconv"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/hashicorp/go-version"
    13  	"github.com/hashicorp/raft"
    14  	"github.com/hashicorp/serf/serf"
    15  )
    16  
    17  // Delegate is the interface for the Autopilot mechanism
    18  type Delegate interface {
    19  	AutopilotConfig() *Config
    20  	FetchStats(context.Context, []serf.Member) map[string]*ServerStats
    21  	IsServer(serf.Member) (*ServerInfo, error)
    22  	NotifyHealth(OperatorHealthReply)
    23  	PromoteNonVoters(*Config, OperatorHealthReply) ([]raft.Server, error)
    24  	Raft() *raft.Raft
    25  	Serf() *serf.Serf
    26  }
    27  
    28  // Autopilot is a mechanism for automatically managing the Raft
    29  // quorum using server health information along with updates from Serf gossip.
    30  // For more information, see https://www.consul.io/docs/guides/autopilot.html
    31  type Autopilot struct {
    32  	logger   *log.Logger
    33  	delegate Delegate
    34  
    35  	interval       time.Duration
    36  	healthInterval time.Duration
    37  
    38  	clusterHealth     OperatorHealthReply
    39  	clusterHealthLock sync.RWMutex
    40  
    41  	enabled      bool
    42  	removeDeadCh chan struct{}
    43  	shutdownCh   chan struct{}
    44  	shutdownLock sync.Mutex
    45  	waitGroup    sync.WaitGroup
    46  }
    47  
    48  type ServerInfo struct {
    49  	Name   string
    50  	ID     string
    51  	Addr   net.Addr
    52  	Build  version.Version
    53  	Status serf.MemberStatus
    54  }
    55  
    56  func NewAutopilot(logger *log.Logger, delegate Delegate, interval, healthInterval time.Duration) *Autopilot {
    57  	return &Autopilot{
    58  		logger:         logger,
    59  		delegate:       delegate,
    60  		interval:       interval,
    61  		healthInterval: healthInterval,
    62  		removeDeadCh:   make(chan struct{}),
    63  	}
    64  }
    65  
    66  func (a *Autopilot) Start() {
    67  	a.shutdownLock.Lock()
    68  	defer a.shutdownLock.Unlock()
    69  
    70  	// Nothing to do
    71  	if a.enabled {
    72  		return
    73  	}
    74  
    75  	a.shutdownCh = make(chan struct{})
    76  	a.waitGroup = sync.WaitGroup{}
    77  	a.clusterHealth = OperatorHealthReply{}
    78  
    79  	a.waitGroup.Add(2)
    80  	go a.run()
    81  	go a.serverHealthLoop()
    82  	a.enabled = true
    83  }
    84  
    85  func (a *Autopilot) Stop() {
    86  	a.shutdownLock.Lock()
    87  	defer a.shutdownLock.Unlock()
    88  
    89  	// Nothing to do
    90  	if !a.enabled {
    91  		return
    92  	}
    93  
    94  	close(a.shutdownCh)
    95  	a.waitGroup.Wait()
    96  	a.enabled = false
    97  }
    98  
    99  // run periodically looks for nonvoting servers to promote and dead servers to remove.
   100  func (a *Autopilot) run() {
   101  	defer a.waitGroup.Done()
   102  
   103  	// Monitor server health until shutdown
   104  	ticker := time.NewTicker(a.interval)
   105  	defer ticker.Stop()
   106  
   107  	for {
   108  		select {
   109  		case <-a.shutdownCh:
   110  			return
   111  		case <-ticker.C:
   112  			if err := a.promoteServers(); err != nil {
   113  				a.logger.Printf("[ERR] autopilot: Error promoting servers: %v", err)
   114  			}
   115  
   116  			if err := a.pruneDeadServers(); err != nil {
   117  				a.logger.Printf("[ERR] autopilot: Error checking for dead servers to remove: %s", err)
   118  			}
   119  		case <-a.removeDeadCh:
   120  			if err := a.pruneDeadServers(); err != nil {
   121  				a.logger.Printf("[ERR] autopilot: Error checking for dead servers to remove: %s", err)
   122  			}
   123  		}
   124  	}
   125  }
   126  
   127  // promoteServers asks the delegate for any promotions and carries them out.
   128  func (a *Autopilot) promoteServers() error {
   129  	conf := a.delegate.AutopilotConfig()
   130  	if conf == nil {
   131  		return nil
   132  	}
   133  
   134  	// Skip the non-voter promotions unless all servers support the new APIs
   135  	minRaftProtocol, err := a.MinRaftProtocol()
   136  	if err != nil {
   137  		return fmt.Errorf("error getting server raft protocol versions: %s", err)
   138  	}
   139  	if minRaftProtocol >= 3 {
   140  		promotions, err := a.delegate.PromoteNonVoters(conf, a.GetClusterHealth())
   141  		if err != nil {
   142  			return fmt.Errorf("error checking for non-voters to promote: %s", err)
   143  		}
   144  		if err := a.handlePromotions(promotions); err != nil {
   145  			return fmt.Errorf("error handling promotions: %s", err)
   146  		}
   147  	}
   148  
   149  	return nil
   150  }
   151  
   152  // fmtServer prints info about a server in a standard way for logging.
   153  func fmtServer(server raft.Server) string {
   154  	return fmt.Sprintf("Server (ID: %q Address: %q)", server.ID, server.Address)
   155  }
   156  
   157  // NumPeers counts the number of voting peers in the given raft config.
   158  func NumPeers(raftConfig raft.Configuration) int {
   159  	var numPeers int
   160  	for _, server := range raftConfig.Servers {
   161  		if server.Suffrage == raft.Voter {
   162  			numPeers++
   163  		}
   164  	}
   165  	return numPeers
   166  }
   167  
   168  // RemoveDeadServers triggers a pruning of dead servers in a non-blocking way.
   169  func (a *Autopilot) RemoveDeadServers() {
   170  	select {
   171  	case a.removeDeadCh <- struct{}{}:
   172  	default:
   173  	}
   174  }
   175  
   176  // pruneDeadServers removes up to numPeers/2 failed servers
   177  func (a *Autopilot) pruneDeadServers() error {
   178  	conf := a.delegate.AutopilotConfig()
   179  	if conf == nil || !conf.CleanupDeadServers {
   180  		return nil
   181  	}
   182  
   183  	// Failed servers are known to Serf and marked failed, and stale servers
   184  	// are known to Raft but not Serf.
   185  	var failed []string
   186  	staleRaftServers := make(map[string]raft.Server)
   187  	raftNode := a.delegate.Raft()
   188  	future := raftNode.GetConfiguration()
   189  	if err := future.Error(); err != nil {
   190  		return err
   191  	}
   192  
   193  	raftConfig := future.Configuration()
   194  	for _, server := range raftConfig.Servers {
   195  		staleRaftServers[string(server.Address)] = server
   196  	}
   197  
   198  	serfLAN := a.delegate.Serf()
   199  	for _, member := range serfLAN.Members() {
   200  		server, err := a.delegate.IsServer(member)
   201  		if err != nil {
   202  			a.logger.Printf("[INFO] autopilot: Error parsing server info for %q: %s", member.Name, err)
   203  			continue
   204  		}
   205  		if server != nil {
   206  			// todo(kyhavlov): change this to index by UUID
   207  			s, found := staleRaftServers[server.Addr.String()]
   208  			if found {
   209  				delete(staleRaftServers, server.Addr.String())
   210  			}
   211  
   212  			if member.Status == serf.StatusFailed {
   213  				// If the node is a nonvoter, we can remove it immediately.
   214  				if found && s.Suffrage == raft.Nonvoter {
   215  					a.logger.Printf("[INFO] autopilot: Attempting removal of failed server node %q", member.Name)
   216  					go serfLAN.RemoveFailedNode(member.Name)
   217  				} else {
   218  					failed = append(failed, member.Name)
   219  				}
   220  			}
   221  		}
   222  	}
   223  
   224  	// We can bail early if there's nothing to do.
   225  	removalCount := len(failed) + len(staleRaftServers)
   226  	if removalCount == 0 {
   227  		return nil
   228  	}
   229  
   230  	// Only do removals if a minority of servers will be affected.
   231  	peers := NumPeers(raftConfig)
   232  	if removalCount < peers/2 {
   233  		for _, node := range failed {
   234  			a.logger.Printf("[INFO] autopilot: Attempting removal of failed server node %q", node)
   235  			go serfLAN.RemoveFailedNode(node)
   236  		}
   237  
   238  		minRaftProtocol, err := a.MinRaftProtocol()
   239  		if err != nil {
   240  			return err
   241  		}
   242  		for _, raftServer := range staleRaftServers {
   243  			a.logger.Printf("[INFO] autopilot: Attempting removal of stale %s", fmtServer(raftServer))
   244  			var future raft.Future
   245  			if minRaftProtocol >= 2 {
   246  				future = raftNode.RemoveServer(raftServer.ID, 0, 0)
   247  			} else {
   248  				future = raftNode.RemovePeer(raftServer.Address)
   249  			}
   250  			if err := future.Error(); err != nil {
   251  				return err
   252  			}
   253  		}
   254  	} else {
   255  		a.logger.Printf("[DEBUG] autopilot: Failed to remove dead servers: too many dead servers: %d/%d", removalCount, peers)
   256  	}
   257  
   258  	return nil
   259  }
   260  
   261  // MinRaftProtocol returns the lowest supported Raft protocol among alive servers
   262  func (a *Autopilot) MinRaftProtocol() (int, error) {
   263  	return minRaftProtocol(a.delegate.Serf().Members(), a.delegate.IsServer)
   264  }
   265  
   266  func minRaftProtocol(members []serf.Member, serverFunc func(serf.Member) (*ServerInfo, error)) (int, error) {
   267  	minVersion := -1
   268  	for _, m := range members {
   269  		if m.Status != serf.StatusAlive {
   270  			continue
   271  		}
   272  
   273  		server, err := serverFunc(m)
   274  		if err != nil {
   275  			return -1, err
   276  		}
   277  		if server == nil {
   278  			continue
   279  		}
   280  
   281  		vsn, ok := m.Tags["raft_vsn"]
   282  		if !ok {
   283  			vsn = "1"
   284  		}
   285  		raftVsn, err := strconv.Atoi(vsn)
   286  		if err != nil {
   287  			return -1, err
   288  		}
   289  
   290  		if minVersion == -1 || raftVsn < minVersion {
   291  			minVersion = raftVsn
   292  		}
   293  	}
   294  
   295  	if minVersion == -1 {
   296  		return minVersion, fmt.Errorf("No servers found")
   297  	}
   298  
   299  	return minVersion, nil
   300  }
   301  
   302  // handlePromotions is a helper shared with Consul Enterprise that attempts to
   303  // apply desired server promotions to the Raft configuration.
   304  func (a *Autopilot) handlePromotions(promotions []raft.Server) error {
   305  	// This used to wait to only promote to maintain an odd quorum of
   306  	// servers, but this was at odds with the dead server cleanup when doing
   307  	// rolling updates (add one new server, wait, and then kill an old
   308  	// server). The dead server cleanup would still count the old server as
   309  	// a peer, which is conservative and the right thing to do, and this
   310  	// would wait to promote, so you could get into a stalemate. It is safer
   311  	// to promote early than remove early, so by promoting as soon as
   312  	// possible we have chosen that as the solution here.
   313  	for _, server := range promotions {
   314  		a.logger.Printf("[INFO] autopilot: Promoting %s to voter", fmtServer(server))
   315  		addFuture := a.delegate.Raft().AddVoter(server.ID, server.Address, 0, 0)
   316  		if err := addFuture.Error(); err != nil {
   317  			return fmt.Errorf("failed to add raft peer: %v", err)
   318  		}
   319  	}
   320  
   321  	// If we promoted a server, trigger a check to remove dead servers.
   322  	if len(promotions) > 0 {
   323  		select {
   324  		case a.removeDeadCh <- struct{}{}:
   325  		default:
   326  		}
   327  	}
   328  	return nil
   329  }
   330  
   331  // serverHealthLoop monitors the health of the servers in the cluster
   332  func (a *Autopilot) serverHealthLoop() {
   333  	defer a.waitGroup.Done()
   334  
   335  	// Monitor server health until shutdown
   336  	ticker := time.NewTicker(a.healthInterval)
   337  	defer ticker.Stop()
   338  
   339  	for {
   340  		select {
   341  		case <-a.shutdownCh:
   342  			return
   343  		case <-ticker.C:
   344  			if err := a.updateClusterHealth(); err != nil {
   345  				a.logger.Printf("[ERR] autopilot: Error updating cluster health: %s", err)
   346  			}
   347  		}
   348  	}
   349  }
   350  
   351  // updateClusterHealth fetches the Raft stats of the other servers and updates
   352  // s.clusterHealth based on the configured Autopilot thresholds
   353  func (a *Autopilot) updateClusterHealth() error {
   354  	// Don't do anything if the min Raft version is too low
   355  	minRaftProtocol, err := a.MinRaftProtocol()
   356  	if err != nil {
   357  		return fmt.Errorf("error getting server raft protocol versions: %s", err)
   358  	}
   359  	if minRaftProtocol < 3 {
   360  		return nil
   361  	}
   362  
   363  	autopilotConf := a.delegate.AutopilotConfig()
   364  	// Bail early if autopilot config hasn't been initialized yet
   365  	if autopilotConf == nil {
   366  		return nil
   367  	}
   368  
   369  	// Get the the serf members which are Consul servers
   370  	var serverMembers []serf.Member
   371  	serverMap := make(map[string]*ServerInfo)
   372  	for _, member := range a.delegate.Serf().Members() {
   373  		if member.Status == serf.StatusLeft {
   374  			continue
   375  		}
   376  
   377  		server, err := a.delegate.IsServer(member)
   378  		if err != nil {
   379  			a.logger.Printf("[INFO] autopilot: Error parsing server info for %q: %s", member.Name, err)
   380  			continue
   381  		}
   382  		if server != nil {
   383  			serverMap[server.ID] = server
   384  			serverMembers = append(serverMembers, member)
   385  		}
   386  	}
   387  
   388  	raftNode := a.delegate.Raft()
   389  	future := raftNode.GetConfiguration()
   390  	if err := future.Error(); err != nil {
   391  		return fmt.Errorf("error getting Raft configuration %s", err)
   392  	}
   393  	servers := future.Configuration().Servers
   394  
   395  	// Fetch the health for each of the servers in parallel so we get as
   396  	// consistent of a sample as possible. We capture the leader's index
   397  	// here as well so it roughly lines up with the same point in time.
   398  	targetLastIndex := raftNode.LastIndex()
   399  	var fetchList []*ServerInfo
   400  	for _, server := range servers {
   401  		if parts, ok := serverMap[string(server.ID)]; ok {
   402  			fetchList = append(fetchList, parts)
   403  		}
   404  	}
   405  	d := time.Now().Add(a.healthInterval / 2)
   406  	ctx, cancel := context.WithDeadline(context.Background(), d)
   407  	defer cancel()
   408  	fetchedStats := a.delegate.FetchStats(ctx, serverMembers)
   409  
   410  	// Build a current list of server healths
   411  	leader := raftNode.Leader()
   412  	var clusterHealth OperatorHealthReply
   413  	voterCount := 0
   414  	healthyCount := 0
   415  	healthyVoterCount := 0
   416  	for _, server := range servers {
   417  		health := ServerHealth{
   418  			ID:          string(server.ID),
   419  			Address:     string(server.Address),
   420  			Leader:      server.Address == leader,
   421  			LastContact: -1,
   422  			Voter:       server.Suffrage == raft.Voter,
   423  		}
   424  
   425  		parts, ok := serverMap[string(server.ID)]
   426  		if ok {
   427  			health.Name = parts.Name
   428  			health.SerfStatus = parts.Status
   429  			health.Version = parts.Build.String()
   430  			if stats, ok := fetchedStats[string(server.ID)]; ok {
   431  				if err := a.updateServerHealth(&health, parts, stats, autopilotConf, targetLastIndex); err != nil {
   432  					a.logger.Printf("[WARN] autopilot: Error updating server %s health: %s", fmtServer(server), err)
   433  				}
   434  			}
   435  		} else {
   436  			health.SerfStatus = serf.StatusNone
   437  		}
   438  
   439  		if health.Voter {
   440  			voterCount++
   441  		}
   442  		if health.Healthy {
   443  			healthyCount++
   444  			if health.Voter {
   445  				healthyVoterCount++
   446  			}
   447  		}
   448  
   449  		clusterHealth.Servers = append(clusterHealth.Servers, health)
   450  	}
   451  	clusterHealth.Healthy = healthyCount == len(servers)
   452  
   453  	// If we have extra healthy voters, update FailureTolerance
   454  	requiredQuorum := voterCount/2 + 1
   455  	if healthyVoterCount > requiredQuorum {
   456  		clusterHealth.FailureTolerance = healthyVoterCount - requiredQuorum
   457  	}
   458  
   459  	a.delegate.NotifyHealth(clusterHealth)
   460  
   461  	a.clusterHealthLock.Lock()
   462  	a.clusterHealth = clusterHealth
   463  	a.clusterHealthLock.Unlock()
   464  
   465  	return nil
   466  }
   467  
   468  // updateServerHealth computes the resulting health of the server based on its
   469  // fetched stats and the state of the leader.
   470  func (a *Autopilot) updateServerHealth(health *ServerHealth,
   471  	server *ServerInfo, stats *ServerStats,
   472  	autopilotConf *Config, targetLastIndex uint64) error {
   473  
   474  	health.LastTerm = stats.LastTerm
   475  	health.LastIndex = stats.LastIndex
   476  
   477  	if stats.LastContact != "never" {
   478  		var err error
   479  		health.LastContact, err = time.ParseDuration(stats.LastContact)
   480  		if err != nil {
   481  			return fmt.Errorf("error parsing last_contact duration: %s", err)
   482  		}
   483  	}
   484  
   485  	raftNode := a.delegate.Raft()
   486  	lastTerm, err := strconv.ParseUint(raftNode.Stats()["last_log_term"], 10, 64)
   487  	if err != nil {
   488  		return fmt.Errorf("error parsing last_log_term: %s", err)
   489  	}
   490  	health.Healthy = health.IsHealthy(lastTerm, targetLastIndex, autopilotConf)
   491  
   492  	// If this is a new server or the health changed, reset StableSince
   493  	lastHealth := a.GetServerHealth(server.ID)
   494  	if lastHealth == nil || lastHealth.Healthy != health.Healthy {
   495  		health.StableSince = time.Now()
   496  	} else {
   497  		health.StableSince = lastHealth.StableSince
   498  	}
   499  
   500  	return nil
   501  }
   502  
   503  func (a *Autopilot) GetClusterHealth() OperatorHealthReply {
   504  	a.clusterHealthLock.RLock()
   505  	defer a.clusterHealthLock.RUnlock()
   506  	return a.clusterHealth
   507  }
   508  
   509  func (a *Autopilot) GetServerHealth(id string) *ServerHealth {
   510  	a.clusterHealthLock.RLock()
   511  	defer a.clusterHealthLock.RUnlock()
   512  	return a.clusterHealth.ServerHealth(id)
   513  }
   514  
   515  func IsPotentialVoter(suffrage raft.ServerSuffrage) bool {
   516  	switch suffrage {
   517  	case raft.Voter, raft.Staging:
   518  		return true
   519  	default:
   520  		return false
   521  	}
   522  }