github.com/macb/etcd@v0.3.1-0.20140227003422-a60481c6b1a0/server/peer_server.go (about) 1 package server 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "encoding/json" 7 "fmt" 8 "io/ioutil" 9 "net/http" 10 "net/url" 11 "strconv" 12 "time" 13 14 "github.com/coreos/etcd/third_party/github.com/coreos/raft" 15 "github.com/coreos/etcd/third_party/github.com/gorilla/mux" 16 17 "github.com/coreos/etcd/discovery" 18 etcdErr "github.com/coreos/etcd/error" 19 "github.com/coreos/etcd/log" 20 "github.com/coreos/etcd/metrics" 21 "github.com/coreos/etcd/store" 22 ) 23 24 const ThresholdMonitorTimeout = 5 * time.Second 25 26 type PeerServerConfig struct { 27 Name string 28 Scheme string 29 URL string 30 SnapshotCount int 31 MaxClusterSize int 32 RetryTimes int 33 RetryInterval float64 34 } 35 36 type PeerServer struct { 37 Config PeerServerConfig 38 raftServer raft.Server 39 server *Server 40 joinIndex uint64 41 followersStats *raftFollowersStats 42 serverStats *raftServerStats 43 registry *Registry 44 store store.Store 45 snapConf *snapshotConf 46 47 closeChan chan bool 48 timeoutThresholdChan chan interface{} 49 50 metrics *metrics.Bucket 51 } 52 53 // TODO: find a good policy to do snapshot 54 type snapshotConf struct { 55 // Etcd will check if snapshot is need every checkingInterval 56 checkingInterval time.Duration 57 58 // The index when the last snapshot happened 59 lastIndex uint64 60 61 // If the incremental number of index since the last snapshot 62 // exceeds the snapshot Threshold, etcd will do a snapshot 63 snapshotThr uint64 64 } 65 66 func NewPeerServer(psConfig PeerServerConfig, registry *Registry, store store.Store, mb *metrics.Bucket, followersStats *raftFollowersStats, serverStats *raftServerStats) *PeerServer { 67 s := &PeerServer{ 68 Config: psConfig, 69 registry: registry, 70 store: store, 71 followersStats: followersStats, 72 serverStats: serverStats, 73 74 timeoutThresholdChan: make(chan interface{}, 1), 75 76 metrics: mb, 77 } 78 79 return s 80 } 81 82 func (s *PeerServer) SetRaftServer(raftServer raft.Server) { 83 s.snapConf = &snapshotConf{ 84 checkingInterval: time.Second * 3, 85 // this is not accurate, we will update raft to provide an api 86 lastIndex: raftServer.CommitIndex(), 87 snapshotThr: uint64(s.Config.SnapshotCount), 88 } 89 90 raftServer.AddEventListener(raft.StateChangeEventType, s.raftEventLogger) 91 raftServer.AddEventListener(raft.LeaderChangeEventType, s.raftEventLogger) 92 raftServer.AddEventListener(raft.TermChangeEventType, s.raftEventLogger) 93 raftServer.AddEventListener(raft.AddPeerEventType, s.raftEventLogger) 94 raftServer.AddEventListener(raft.RemovePeerEventType, s.raftEventLogger) 95 raftServer.AddEventListener(raft.HeartbeatIntervalEventType, s.raftEventLogger) 96 raftServer.AddEventListener(raft.ElectionTimeoutThresholdEventType, s.raftEventLogger) 97 98 raftServer.AddEventListener(raft.HeartbeatEventType, s.recordMetricEvent) 99 100 s.raftServer = raftServer 101 } 102 103 // Helper function to do discovery and return results in expected format 104 func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err error) { 105 peers, err = discovery.Do(discoverURL, s.Config.Name, s.Config.URL) 106 107 // Warn about errors coming from discovery, this isn't fatal 108 // since the user might have provided a peer list elsewhere, 109 // or there is some log in data dir. 110 if err != nil { 111 log.Warnf("Discovery encountered an error: %v", err) 112 return 113 } 114 115 for i := range peers { 116 // Strip the scheme off of the peer if it has one 117 // TODO(bp): clean this up! 118 purl, err := url.Parse(peers[i]) 119 if err == nil { 120 peers[i] = purl.Host 121 } 122 } 123 124 log.Infof("Discovery fetched back peer list: %v", peers) 125 126 return 127 } 128 129 // Try all possible ways to find clusters to join 130 // Include -discovery, -peers and log data in -data-dir 131 // 132 // Peer discovery follows this order: 133 // 1. -discovery 134 // 2. -peers 135 // 3. previous peers in -data-dir 136 func (s *PeerServer) findCluster(discoverURL string, peers []string) { 137 // Attempt cluster discovery 138 toDiscover := discoverURL != "" 139 if toDiscover { 140 discoverPeers, discoverErr := s.handleDiscovery(discoverURL) 141 // It is registered in discover url 142 if discoverErr == nil { 143 // start as a leader in a new cluster 144 if len(discoverPeers) == 0 { 145 log.Debug("This peer is starting a brand new cluster based on discover URL.") 146 s.startAsLeader() 147 } else { 148 s.startAsFollower(discoverPeers) 149 } 150 return 151 } 152 } 153 154 hasPeerList := len(peers) > 0 155 // if there is log in data dir, append previous peers to peers in config 156 // to find cluster 157 prevPeers := s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name) 158 for i := 0; i < len(prevPeers); i++ { 159 u, err := url.Parse(prevPeers[i]) 160 if err != nil { 161 log.Debug("rejoin cannot parse url: ", err) 162 } 163 prevPeers[i] = u.Host 164 } 165 peers = append(peers, prevPeers...) 166 167 // if there is backup peer lists, use it to find cluster 168 if len(peers) > 0 { 169 ok := s.joinCluster(peers) 170 if !ok { 171 log.Warn("No living peers are found!") 172 } else { 173 log.Debugf("%s restart as a follower based on peers[%v]", s.Config.Name) 174 return 175 } 176 } 177 178 if !s.raftServer.IsLogEmpty() { 179 log.Debug("Entire cluster is down! %v will restart the cluster.", s.Config.Name) 180 return 181 } 182 183 if toDiscover { 184 log.Fatalf("Discovery failed, no available peers in backup list, and no log data") 185 } 186 187 if hasPeerList { 188 log.Fatalf("No available peers in backup list, and no log data") 189 } 190 191 log.Infof("This peer is starting a brand new cluster now.") 192 s.startAsLeader() 193 } 194 195 // Start the raft server 196 func (s *PeerServer) Start(snapshot bool, discoverURL string, peers []string) error { 197 // LoadSnapshot 198 if snapshot { 199 err := s.raftServer.LoadSnapshot() 200 201 if err == nil { 202 log.Debugf("%s finished load snapshot", s.Config.Name) 203 } else { 204 log.Debug(err) 205 } 206 } 207 208 s.raftServer.Start() 209 210 s.findCluster(discoverURL, peers) 211 212 s.closeChan = make(chan bool) 213 214 go s.monitorSync() 215 go s.monitorTimeoutThreshold(s.closeChan) 216 217 // open the snapshot 218 if snapshot { 219 go s.monitorSnapshot() 220 } 221 222 return nil 223 } 224 225 func (s *PeerServer) Stop() { 226 if s.closeChan != nil { 227 close(s.closeChan) 228 s.closeChan = nil 229 } 230 s.raftServer.Stop() 231 } 232 233 func (s *PeerServer) HTTPHandler() http.Handler { 234 router := mux.NewRouter() 235 236 // internal commands 237 router.HandleFunc("/name", s.NameHttpHandler) 238 router.HandleFunc("/version", s.VersionHttpHandler) 239 router.HandleFunc("/version/{version:[0-9]+}/check", s.VersionCheckHttpHandler) 240 router.HandleFunc("/upgrade", s.UpgradeHttpHandler) 241 router.HandleFunc("/join", s.JoinHttpHandler) 242 router.HandleFunc("/remove/{name:.+}", s.RemoveHttpHandler) 243 router.HandleFunc("/vote", s.VoteHttpHandler) 244 router.HandleFunc("/log", s.GetLogHttpHandler) 245 router.HandleFunc("/log/append", s.AppendEntriesHttpHandler) 246 router.HandleFunc("/snapshot", s.SnapshotHttpHandler) 247 router.HandleFunc("/snapshotRecovery", s.SnapshotRecoveryHttpHandler) 248 router.HandleFunc("/etcdURL", s.EtcdURLHttpHandler) 249 250 return router 251 } 252 253 // Retrieves the underlying Raft server. 254 func (s *PeerServer) RaftServer() raft.Server { 255 return s.raftServer 256 } 257 258 // Associates the client server with the peer server. 259 func (s *PeerServer) SetServer(server *Server) { 260 s.server = server 261 } 262 263 func (s *PeerServer) startAsLeader() { 264 // leader need to join self as a peer 265 for { 266 _, err := s.raftServer.Do(NewJoinCommand(store.MinVersion(), store.MaxVersion(), s.raftServer.Name(), s.Config.URL, s.server.URL())) 267 if err == nil { 268 break 269 } 270 } 271 log.Debugf("%s start as a leader", s.Config.Name) 272 } 273 274 func (s *PeerServer) startAsFollower(cluster []string) { 275 // start as a follower in a existing cluster 276 for i := 0; i < s.Config.RetryTimes; i++ { 277 ok := s.joinCluster(cluster) 278 if ok { 279 return 280 } 281 log.Warnf("%v is unable to join the cluster using any of the peers %v at %dth time. Retrying in %.1f seconds", s.Config.Name, cluster, i, s.Config.RetryInterval) 282 time.Sleep(time.Second * time.Duration(s.Config.RetryInterval)) 283 } 284 285 log.Fatalf("Cannot join the cluster via given peers after %x retries", s.Config.RetryTimes) 286 } 287 288 // getVersion fetches the peer version of a cluster. 289 func getVersion(t *transporter, versionURL url.URL) (int, error) { 290 resp, req, err := t.Get(versionURL.String()) 291 if err != nil { 292 return 0, err 293 } 294 defer resp.Body.Close() 295 296 t.CancelWhenTimeout(req) 297 body, err := ioutil.ReadAll(resp.Body) 298 if err != nil { 299 return 0, err 300 } 301 302 // Parse version number. 303 version, _ := strconv.Atoi(string(body)) 304 return version, nil 305 } 306 307 // Upgradable checks whether all peers in a cluster support an upgrade to the next store version. 308 func (s *PeerServer) Upgradable() error { 309 nextVersion := s.store.Version() + 1 310 for _, peerURL := range s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name) { 311 u, err := url.Parse(peerURL) 312 if err != nil { 313 return fmt.Errorf("PeerServer: Cannot parse URL: '%s' (%s)", peerURL, err) 314 } 315 316 t, _ := s.raftServer.Transporter().(*transporter) 317 checkURL := (&url.URL{Host: u.Host, Scheme: s.Config.Scheme, Path: fmt.Sprintf("/version/%d/check", nextVersion)}).String() 318 resp, _, err := t.Get(checkURL) 319 if err != nil { 320 return fmt.Errorf("PeerServer: Cannot check version compatibility: %s", u.Host) 321 } 322 if resp.StatusCode != 200 { 323 return fmt.Errorf("PeerServer: Version %d is not compatible with peer: %s", nextVersion, u.Host) 324 } 325 } 326 327 return nil 328 } 329 330 func (s *PeerServer) joinCluster(cluster []string) bool { 331 for _, peer := range cluster { 332 if len(peer) == 0 { 333 continue 334 } 335 336 err := s.joinByPeer(s.raftServer, peer, s.Config.Scheme) 337 if err == nil { 338 log.Debugf("%s joined the cluster via peer %s", s.Config.Name, peer) 339 return true 340 341 } 342 343 if _, ok := err.(etcdErr.Error); ok { 344 log.Fatal(err) 345 } 346 347 log.Warnf("Attempt to join via %s failed: %s", peer, err) 348 } 349 350 return false 351 } 352 353 // Send join requests to peer. 354 func (s *PeerServer) joinByPeer(server raft.Server, peer string, scheme string) error { 355 var b bytes.Buffer 356 357 // t must be ok 358 t, _ := server.Transporter().(*transporter) 359 360 // Our version must match the leaders version 361 versionURL := url.URL{Host: peer, Scheme: scheme, Path: "/version"} 362 version, err := getVersion(t, versionURL) 363 if err != nil { 364 return fmt.Errorf("Error during join version check: %v", err) 365 } 366 if version < store.MinVersion() || version > store.MaxVersion() { 367 return fmt.Errorf("Unable to join: cluster version is %d; version compatibility is %d - %d", version, store.MinVersion(), store.MaxVersion()) 368 } 369 370 json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL())) 371 372 joinURL := url.URL{Host: peer, Scheme: scheme, Path: "/join"} 373 374 log.Debugf("Send Join Request to %s", joinURL.String()) 375 376 resp, req, err := t.Post(joinURL.String(), &b) 377 378 for { 379 if err != nil { 380 return fmt.Errorf("Unable to join: %v", err) 381 } 382 if resp != nil { 383 defer resp.Body.Close() 384 385 t.CancelWhenTimeout(req) 386 387 if resp.StatusCode == http.StatusOK { 388 b, _ := ioutil.ReadAll(resp.Body) 389 s.joinIndex, _ = binary.Uvarint(b) 390 return nil 391 } 392 if resp.StatusCode == http.StatusTemporaryRedirect { 393 address := resp.Header.Get("Location") 394 log.Debugf("Send Join Request to %s", address) 395 json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL())) 396 resp, req, err = t.Post(address, &b) 397 398 } else if resp.StatusCode == http.StatusBadRequest { 399 log.Debug("Reach max number peers in the cluster") 400 decoder := json.NewDecoder(resp.Body) 401 err := &etcdErr.Error{} 402 decoder.Decode(err) 403 return *err 404 } else { 405 return fmt.Errorf("Unable to join") 406 } 407 } 408 409 } 410 } 411 412 func (s *PeerServer) Stats() []byte { 413 s.serverStats.LeaderInfo.Uptime = time.Now().Sub(s.serverStats.LeaderInfo.startTime).String() 414 415 // TODO: register state listener to raft to change this field 416 // rather than compare the state each time Stats() is called. 417 if s.RaftServer().State() == raft.Leader { 418 s.serverStats.LeaderInfo.Name = s.RaftServer().Name() 419 } 420 421 queue := s.serverStats.sendRateQueue 422 423 s.serverStats.SendingPkgRate, s.serverStats.SendingBandwidthRate = queue.Rate() 424 425 queue = s.serverStats.recvRateQueue 426 427 s.serverStats.RecvingPkgRate, s.serverStats.RecvingBandwidthRate = queue.Rate() 428 429 b, _ := json.Marshal(s.serverStats) 430 431 return b 432 } 433 434 func (s *PeerServer) PeerStats() []byte { 435 if s.raftServer.State() == raft.Leader { 436 b, _ := json.Marshal(s.followersStats) 437 return b 438 } 439 return nil 440 } 441 442 // raftEventLogger converts events from the Raft server into log messages. 443 func (s *PeerServer) raftEventLogger(event raft.Event) { 444 value := event.Value() 445 prevValue := event.PrevValue() 446 if value == nil { 447 value = "<nil>" 448 } 449 if prevValue == nil { 450 prevValue = "<nil>" 451 } 452 453 switch event.Type() { 454 case raft.StateChangeEventType: 455 log.Infof("%s: state changed from '%v' to '%v'.", s.Config.Name, prevValue, value) 456 case raft.TermChangeEventType: 457 log.Infof("%s: term #%v started.", s.Config.Name, value) 458 case raft.LeaderChangeEventType: 459 log.Infof("%s: leader changed from '%v' to '%v'.", s.Config.Name, prevValue, value) 460 case raft.AddPeerEventType: 461 log.Infof("%s: peer added: '%v'", s.Config.Name, value) 462 case raft.RemovePeerEventType: 463 log.Infof("%s: peer removed: '%v'", s.Config.Name, value) 464 case raft.HeartbeatIntervalEventType: 465 var name = "<unknown>" 466 if peer, ok := value.(*raft.Peer); ok { 467 name = peer.Name 468 } 469 log.Infof("%s: warning: heartbeat timed out: '%v'", s.Config.Name, name) 470 case raft.ElectionTimeoutThresholdEventType: 471 select { 472 case s.timeoutThresholdChan <- value: 473 default: 474 } 475 476 } 477 } 478 479 func (s *PeerServer) recordMetricEvent(event raft.Event) { 480 name := fmt.Sprintf("raft.event.%s", event.Type()) 481 value := event.Value().(time.Duration) 482 (*s.metrics).Timer(name).Update(value) 483 } 484 485 // logSnapshot logs about the snapshot that was taken. 486 func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) { 487 info := fmt.Sprintf("%s: snapshot of %d events at index %d", s.Config.Name, count, currentIndex) 488 489 if err != nil { 490 log.Infof("%s attempted and failed: %v", info, err) 491 } else { 492 log.Infof("%s completed", info) 493 } 494 } 495 496 func (s *PeerServer) monitorSnapshot() { 497 for { 498 time.Sleep(s.snapConf.checkingInterval) 499 currentIndex := s.RaftServer().CommitIndex() 500 count := currentIndex - s.snapConf.lastIndex 501 if uint64(count) > s.snapConf.snapshotThr { 502 err := s.raftServer.TakeSnapshot() 503 s.logSnapshot(err, currentIndex, count) 504 s.snapConf.lastIndex = currentIndex 505 } 506 } 507 } 508 509 func (s *PeerServer) monitorSync() { 510 ticker := time.Tick(time.Millisecond * 500) 511 for { 512 select { 513 case now := <-ticker: 514 if s.raftServer.State() == raft.Leader { 515 s.raftServer.Do(s.store.CommandFactory().CreateSyncCommand(now)) 516 } 517 } 518 } 519 } 520 521 // monitorTimeoutThreshold groups timeout threshold events together and prints 522 // them as a single log line. 523 func (s *PeerServer) monitorTimeoutThreshold(closeChan chan bool) { 524 for { 525 select { 526 case value := <-s.timeoutThresholdChan: 527 log.Infof("%s: warning: heartbeat near election timeout: %v", s.Config.Name, value) 528 case <-closeChan: 529 return 530 } 531 532 time.Sleep(ThresholdMonitorTimeout) 533 } 534 }