github.phpd.cn/hashicorp/consul@v1.4.5/agent/consul/autopilot/autopilot.go (about) 1 package autopilot 2 3 import ( 4 "context" 5 "fmt" 6 "log" 7 "net" 8 "strconv" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/go-version" 13 "github.com/hashicorp/raft" 14 "github.com/hashicorp/serf/serf" 15 ) 16 17 // Delegate is the interface for the Autopilot mechanism 18 type Delegate interface { 19 AutopilotConfig() *Config 20 FetchStats(context.Context, []serf.Member) map[string]*ServerStats 21 IsServer(serf.Member) (*ServerInfo, error) 22 NotifyHealth(OperatorHealthReply) 23 PromoteNonVoters(*Config, OperatorHealthReply) ([]raft.Server, error) 24 Raft() *raft.Raft 25 Serf() *serf.Serf 26 } 27 28 // Autopilot is a mechanism for automatically managing the Raft 29 // quorum using server health information along with updates from Serf gossip. 30 // For more information, see https://www.consul.io/docs/guides/autopilot.html 31 type Autopilot struct { 32 logger *log.Logger 33 delegate Delegate 34 35 interval time.Duration 36 healthInterval time.Duration 37 38 clusterHealth OperatorHealthReply 39 clusterHealthLock sync.RWMutex 40 41 enabled bool 42 removeDeadCh chan struct{} 43 shutdownCh chan struct{} 44 shutdownLock sync.Mutex 45 waitGroup sync.WaitGroup 46 } 47 48 type ServerInfo struct { 49 Name string 50 ID string 51 Addr net.Addr 52 Build version.Version 53 Status serf.MemberStatus 54 } 55 56 func NewAutopilot(logger *log.Logger, delegate Delegate, interval, healthInterval time.Duration) *Autopilot { 57 return &Autopilot{ 58 logger: logger, 59 delegate: delegate, 60 interval: interval, 61 healthInterval: healthInterval, 62 removeDeadCh: make(chan struct{}), 63 } 64 } 65 66 func (a *Autopilot) Start() { 67 a.shutdownLock.Lock() 68 defer a.shutdownLock.Unlock() 69 70 // Nothing to do 71 if a.enabled { 72 return 73 } 74 75 a.shutdownCh = make(chan struct{}) 76 a.waitGroup = sync.WaitGroup{} 77 a.clusterHealth = OperatorHealthReply{} 78 79 a.waitGroup.Add(2) 80 go a.run() 81 go a.serverHealthLoop() 82 a.enabled = true 83 } 84 85 func (a *Autopilot) Stop() { 86 a.shutdownLock.Lock() 87 defer a.shutdownLock.Unlock() 88 89 // Nothing to do 90 if !a.enabled { 91 return 92 } 93 94 close(a.shutdownCh) 95 a.waitGroup.Wait() 96 a.enabled = false 97 } 98 99 // run periodically looks for nonvoting servers to promote and dead servers to remove. 100 func (a *Autopilot) run() { 101 defer a.waitGroup.Done() 102 103 // Monitor server health until shutdown 104 ticker := time.NewTicker(a.interval) 105 defer ticker.Stop() 106 107 for { 108 select { 109 case <-a.shutdownCh: 110 return 111 case <-ticker.C: 112 if err := a.promoteServers(); err != nil { 113 a.logger.Printf("[ERR] autopilot: Error promoting servers: %v", err) 114 } 115 116 if err := a.pruneDeadServers(); err != nil { 117 a.logger.Printf("[ERR] autopilot: Error checking for dead servers to remove: %s", err) 118 } 119 case <-a.removeDeadCh: 120 if err := a.pruneDeadServers(); err != nil { 121 a.logger.Printf("[ERR] autopilot: Error checking for dead servers to remove: %s", err) 122 } 123 } 124 } 125 } 126 127 // promoteServers asks the delegate for any promotions and carries them out. 128 func (a *Autopilot) promoteServers() error { 129 conf := a.delegate.AutopilotConfig() 130 if conf == nil { 131 return nil 132 } 133 134 // Skip the non-voter promotions unless all servers support the new APIs 135 minRaftProtocol, err := a.MinRaftProtocol() 136 if err != nil { 137 return fmt.Errorf("error getting server raft protocol versions: %s", err) 138 } 139 if minRaftProtocol >= 3 { 140 promotions, err := a.delegate.PromoteNonVoters(conf, a.GetClusterHealth()) 141 if err != nil { 142 return fmt.Errorf("error checking for non-voters to promote: %s", err) 143 } 144 if err := a.handlePromotions(promotions); err != nil { 145 return fmt.Errorf("error handling promotions: %s", err) 146 } 147 } 148 149 return nil 150 } 151 152 // fmtServer prints info about a server in a standard way for logging. 153 func fmtServer(server raft.Server) string { 154 return fmt.Sprintf("Server (ID: %q Address: %q)", server.ID, server.Address) 155 } 156 157 // NumPeers counts the number of voting peers in the given raft config. 158 func NumPeers(raftConfig raft.Configuration) int { 159 var numPeers int 160 for _, server := range raftConfig.Servers { 161 if server.Suffrage == raft.Voter { 162 numPeers++ 163 } 164 } 165 return numPeers 166 } 167 168 // RemoveDeadServers triggers a pruning of dead servers in a non-blocking way. 169 func (a *Autopilot) RemoveDeadServers() { 170 select { 171 case a.removeDeadCh <- struct{}{}: 172 default: 173 } 174 } 175 176 // pruneDeadServers removes up to numPeers/2 failed servers 177 func (a *Autopilot) pruneDeadServers() error { 178 conf := a.delegate.AutopilotConfig() 179 if conf == nil || !conf.CleanupDeadServers { 180 return nil 181 } 182 183 // Failed servers are known to Serf and marked failed, and stale servers 184 // are known to Raft but not Serf. 185 var failed []string 186 staleRaftServers := make(map[string]raft.Server) 187 raftNode := a.delegate.Raft() 188 future := raftNode.GetConfiguration() 189 if err := future.Error(); err != nil { 190 return err 191 } 192 193 raftConfig := future.Configuration() 194 for _, server := range raftConfig.Servers { 195 staleRaftServers[string(server.Address)] = server 196 } 197 198 serfLAN := a.delegate.Serf() 199 for _, member := range serfLAN.Members() { 200 server, err := a.delegate.IsServer(member) 201 if err != nil { 202 a.logger.Printf("[INFO] autopilot: Error parsing server info for %q: %s", member.Name, err) 203 continue 204 } 205 if server != nil { 206 // todo(kyhavlov): change this to index by UUID 207 s, found := staleRaftServers[server.Addr.String()] 208 if found { 209 delete(staleRaftServers, server.Addr.String()) 210 } 211 212 if member.Status == serf.StatusFailed { 213 // If the node is a nonvoter, we can remove it immediately. 214 if found && s.Suffrage == raft.Nonvoter { 215 a.logger.Printf("[INFO] autopilot: Attempting removal of failed server node %q", member.Name) 216 go serfLAN.RemoveFailedNode(member.Name) 217 } else { 218 failed = append(failed, member.Name) 219 } 220 } 221 } 222 } 223 224 // We can bail early if there's nothing to do. 225 removalCount := len(failed) + len(staleRaftServers) 226 if removalCount == 0 { 227 return nil 228 } 229 230 // Only do removals if a minority of servers will be affected. 231 peers := NumPeers(raftConfig) 232 if removalCount < peers/2 { 233 for _, node := range failed { 234 a.logger.Printf("[INFO] autopilot: Attempting removal of failed server node %q", node) 235 go serfLAN.RemoveFailedNode(node) 236 } 237 238 minRaftProtocol, err := a.MinRaftProtocol() 239 if err != nil { 240 return err 241 } 242 for _, raftServer := range staleRaftServers { 243 a.logger.Printf("[INFO] autopilot: Attempting removal of stale %s", fmtServer(raftServer)) 244 var future raft.Future 245 if minRaftProtocol >= 2 { 246 future = raftNode.RemoveServer(raftServer.ID, 0, 0) 247 } else { 248 future = raftNode.RemovePeer(raftServer.Address) 249 } 250 if err := future.Error(); err != nil { 251 return err 252 } 253 } 254 } else { 255 a.logger.Printf("[DEBUG] autopilot: Failed to remove dead servers: too many dead servers: %d/%d", removalCount, peers) 256 } 257 258 return nil 259 } 260 261 // MinRaftProtocol returns the lowest supported Raft protocol among alive servers 262 func (a *Autopilot) MinRaftProtocol() (int, error) { 263 return minRaftProtocol(a.delegate.Serf().Members(), a.delegate.IsServer) 264 } 265 266 func minRaftProtocol(members []serf.Member, serverFunc func(serf.Member) (*ServerInfo, error)) (int, error) { 267 minVersion := -1 268 for _, m := range members { 269 if m.Status != serf.StatusAlive { 270 continue 271 } 272 273 server, err := serverFunc(m) 274 if err != nil { 275 return -1, err 276 } 277 if server == nil { 278 continue 279 } 280 281 vsn, ok := m.Tags["raft_vsn"] 282 if !ok { 283 vsn = "1" 284 } 285 raftVsn, err := strconv.Atoi(vsn) 286 if err != nil { 287 return -1, err 288 } 289 290 if minVersion == -1 || raftVsn < minVersion { 291 minVersion = raftVsn 292 } 293 } 294 295 if minVersion == -1 { 296 return minVersion, fmt.Errorf("No servers found") 297 } 298 299 return minVersion, nil 300 } 301 302 // handlePromotions is a helper shared with Consul Enterprise that attempts to 303 // apply desired server promotions to the Raft configuration. 304 func (a *Autopilot) handlePromotions(promotions []raft.Server) error { 305 // This used to wait to only promote to maintain an odd quorum of 306 // servers, but this was at odds with the dead server cleanup when doing 307 // rolling updates (add one new server, wait, and then kill an old 308 // server). The dead server cleanup would still count the old server as 309 // a peer, which is conservative and the right thing to do, and this 310 // would wait to promote, so you could get into a stalemate. It is safer 311 // to promote early than remove early, so by promoting as soon as 312 // possible we have chosen that as the solution here. 313 for _, server := range promotions { 314 a.logger.Printf("[INFO] autopilot: Promoting %s to voter", fmtServer(server)) 315 addFuture := a.delegate.Raft().AddVoter(server.ID, server.Address, 0, 0) 316 if err := addFuture.Error(); err != nil { 317 return fmt.Errorf("failed to add raft peer: %v", err) 318 } 319 } 320 321 // If we promoted a server, trigger a check to remove dead servers. 322 if len(promotions) > 0 { 323 select { 324 case a.removeDeadCh <- struct{}{}: 325 default: 326 } 327 } 328 return nil 329 } 330 331 // serverHealthLoop monitors the health of the servers in the cluster 332 func (a *Autopilot) serverHealthLoop() { 333 defer a.waitGroup.Done() 334 335 // Monitor server health until shutdown 336 ticker := time.NewTicker(a.healthInterval) 337 defer ticker.Stop() 338 339 for { 340 select { 341 case <-a.shutdownCh: 342 return 343 case <-ticker.C: 344 if err := a.updateClusterHealth(); err != nil { 345 a.logger.Printf("[ERR] autopilot: Error updating cluster health: %s", err) 346 } 347 } 348 } 349 } 350 351 // updateClusterHealth fetches the Raft stats of the other servers and updates 352 // s.clusterHealth based on the configured Autopilot thresholds 353 func (a *Autopilot) updateClusterHealth() error { 354 // Don't do anything if the min Raft version is too low 355 minRaftProtocol, err := a.MinRaftProtocol() 356 if err != nil { 357 return fmt.Errorf("error getting server raft protocol versions: %s", err) 358 } 359 if minRaftProtocol < 3 { 360 return nil 361 } 362 363 autopilotConf := a.delegate.AutopilotConfig() 364 // Bail early if autopilot config hasn't been initialized yet 365 if autopilotConf == nil { 366 return nil 367 } 368 369 // Get the the serf members which are Consul servers 370 var serverMembers []serf.Member 371 serverMap := make(map[string]*ServerInfo) 372 for _, member := range a.delegate.Serf().Members() { 373 if member.Status == serf.StatusLeft { 374 continue 375 } 376 377 server, err := a.delegate.IsServer(member) 378 if err != nil { 379 a.logger.Printf("[INFO] autopilot: Error parsing server info for %q: %s", member.Name, err) 380 continue 381 } 382 if server != nil { 383 serverMap[server.ID] = server 384 serverMembers = append(serverMembers, member) 385 } 386 } 387 388 raftNode := a.delegate.Raft() 389 future := raftNode.GetConfiguration() 390 if err := future.Error(); err != nil { 391 return fmt.Errorf("error getting Raft configuration %s", err) 392 } 393 servers := future.Configuration().Servers 394 395 // Fetch the health for each of the servers in parallel so we get as 396 // consistent of a sample as possible. We capture the leader's index 397 // here as well so it roughly lines up with the same point in time. 398 targetLastIndex := raftNode.LastIndex() 399 var fetchList []*ServerInfo 400 for _, server := range servers { 401 if parts, ok := serverMap[string(server.ID)]; ok { 402 fetchList = append(fetchList, parts) 403 } 404 } 405 d := time.Now().Add(a.healthInterval / 2) 406 ctx, cancel := context.WithDeadline(context.Background(), d) 407 defer cancel() 408 fetchedStats := a.delegate.FetchStats(ctx, serverMembers) 409 410 // Build a current list of server healths 411 leader := raftNode.Leader() 412 var clusterHealth OperatorHealthReply 413 voterCount := 0 414 healthyCount := 0 415 healthyVoterCount := 0 416 for _, server := range servers { 417 health := ServerHealth{ 418 ID: string(server.ID), 419 Address: string(server.Address), 420 Leader: server.Address == leader, 421 LastContact: -1, 422 Voter: server.Suffrage == raft.Voter, 423 } 424 425 parts, ok := serverMap[string(server.ID)] 426 if ok { 427 health.Name = parts.Name 428 health.SerfStatus = parts.Status 429 health.Version = parts.Build.String() 430 if stats, ok := fetchedStats[string(server.ID)]; ok { 431 if err := a.updateServerHealth(&health, parts, stats, autopilotConf, targetLastIndex); err != nil { 432 a.logger.Printf("[WARN] autopilot: Error updating server %s health: %s", fmtServer(server), err) 433 } 434 } 435 } else { 436 health.SerfStatus = serf.StatusNone 437 } 438 439 if health.Voter { 440 voterCount++ 441 } 442 if health.Healthy { 443 healthyCount++ 444 if health.Voter { 445 healthyVoterCount++ 446 } 447 } 448 449 clusterHealth.Servers = append(clusterHealth.Servers, health) 450 } 451 clusterHealth.Healthy = healthyCount == len(servers) 452 453 // If we have extra healthy voters, update FailureTolerance 454 requiredQuorum := voterCount/2 + 1 455 if healthyVoterCount > requiredQuorum { 456 clusterHealth.FailureTolerance = healthyVoterCount - requiredQuorum 457 } 458 459 a.delegate.NotifyHealth(clusterHealth) 460 461 a.clusterHealthLock.Lock() 462 a.clusterHealth = clusterHealth 463 a.clusterHealthLock.Unlock() 464 465 return nil 466 } 467 468 // updateServerHealth computes the resulting health of the server based on its 469 // fetched stats and the state of the leader. 470 func (a *Autopilot) updateServerHealth(health *ServerHealth, 471 server *ServerInfo, stats *ServerStats, 472 autopilotConf *Config, targetLastIndex uint64) error { 473 474 health.LastTerm = stats.LastTerm 475 health.LastIndex = stats.LastIndex 476 477 if stats.LastContact != "never" { 478 var err error 479 health.LastContact, err = time.ParseDuration(stats.LastContact) 480 if err != nil { 481 return fmt.Errorf("error parsing last_contact duration: %s", err) 482 } 483 } 484 485 raftNode := a.delegate.Raft() 486 lastTerm, err := strconv.ParseUint(raftNode.Stats()["last_log_term"], 10, 64) 487 if err != nil { 488 return fmt.Errorf("error parsing last_log_term: %s", err) 489 } 490 health.Healthy = health.IsHealthy(lastTerm, targetLastIndex, autopilotConf) 491 492 // If this is a new server or the health changed, reset StableSince 493 lastHealth := a.GetServerHealth(server.ID) 494 if lastHealth == nil || lastHealth.Healthy != health.Healthy { 495 health.StableSince = time.Now() 496 } else { 497 health.StableSince = lastHealth.StableSince 498 } 499 500 return nil 501 } 502 503 func (a *Autopilot) GetClusterHealth() OperatorHealthReply { 504 a.clusterHealthLock.RLock() 505 defer a.clusterHealthLock.RUnlock() 506 return a.clusterHealth 507 } 508 509 func (a *Autopilot) GetServerHealth(id string) *ServerHealth { 510 a.clusterHealthLock.RLock() 511 defer a.clusterHealthLock.RUnlock() 512 return a.clusterHealth.ServerHealth(id) 513 } 514 515 func IsPotentialVoter(suffrage raft.ServerSuffrage) bool { 516 switch suffrage { 517 case raft.Voter, raft.Staging: 518 return true 519 default: 520 return false 521 } 522 }