github.com/macb/etcd@v0.3.1-0.20140227003422-a60481c6b1a0/server/peer_server.go (about)

     1  package server
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net/http"
    10  	"net/url"
    11  	"strconv"
    12  	"time"
    13  
    14  	"github.com/coreos/etcd/third_party/github.com/coreos/raft"
    15  	"github.com/coreos/etcd/third_party/github.com/gorilla/mux"
    16  
    17  	"github.com/coreos/etcd/discovery"
    18  	etcdErr "github.com/coreos/etcd/error"
    19  	"github.com/coreos/etcd/log"
    20  	"github.com/coreos/etcd/metrics"
    21  	"github.com/coreos/etcd/store"
    22  )
    23  
    24  const ThresholdMonitorTimeout = 5 * time.Second
    25  
    26  type PeerServerConfig struct {
    27  	Name           string
    28  	Scheme         string
    29  	URL            string
    30  	SnapshotCount  int
    31  	MaxClusterSize int
    32  	RetryTimes     int
    33  	RetryInterval  float64
    34  }
    35  
    36  type PeerServer struct {
    37  	Config		PeerServerConfig
    38  	raftServer	raft.Server
    39  	server		*Server
    40  	joinIndex	uint64
    41  	followersStats	*raftFollowersStats
    42  	serverStats	*raftServerStats
    43  	registry	*Registry
    44  	store		store.Store
    45  	snapConf	*snapshotConf
    46  
    47  	closeChan		chan bool
    48  	timeoutThresholdChan	chan interface{}
    49  
    50  	metrics	*metrics.Bucket
    51  }
    52  
    53  // TODO: find a good policy to do snapshot
    54  type snapshotConf struct {
    55  	// Etcd will check if snapshot is need every checkingInterval
    56  	checkingInterval	time.Duration
    57  
    58  	// The index when the last snapshot happened
    59  	lastIndex	uint64
    60  
    61  	// If the incremental number of index since the last snapshot
    62  	// exceeds the snapshot Threshold, etcd will do a snapshot
    63  	snapshotThr	uint64
    64  }
    65  
    66  func NewPeerServer(psConfig PeerServerConfig, registry *Registry, store store.Store, mb *metrics.Bucket, followersStats *raftFollowersStats, serverStats *raftServerStats) *PeerServer {
    67  	s := &PeerServer{
    68  		Config:		psConfig,
    69  		registry:	registry,
    70  		store:		store,
    71  		followersStats:	followersStats,
    72  		serverStats:	serverStats,
    73  
    74  		timeoutThresholdChan:	make(chan interface{}, 1),
    75  
    76  		metrics:	mb,
    77  	}
    78  
    79  	return s
    80  }
    81  
    82  func (s *PeerServer) SetRaftServer(raftServer raft.Server) {
    83  	s.snapConf = &snapshotConf{
    84  		checkingInterval:	time.Second * 3,
    85  		// this is not accurate, we will update raft to provide an api
    86  		lastIndex:	raftServer.CommitIndex(),
    87  		snapshotThr:	uint64(s.Config.SnapshotCount),
    88  	}
    89  
    90  	raftServer.AddEventListener(raft.StateChangeEventType, s.raftEventLogger)
    91  	raftServer.AddEventListener(raft.LeaderChangeEventType, s.raftEventLogger)
    92  	raftServer.AddEventListener(raft.TermChangeEventType, s.raftEventLogger)
    93  	raftServer.AddEventListener(raft.AddPeerEventType, s.raftEventLogger)
    94  	raftServer.AddEventListener(raft.RemovePeerEventType, s.raftEventLogger)
    95  	raftServer.AddEventListener(raft.HeartbeatIntervalEventType, s.raftEventLogger)
    96  	raftServer.AddEventListener(raft.ElectionTimeoutThresholdEventType, s.raftEventLogger)
    97  
    98  	raftServer.AddEventListener(raft.HeartbeatEventType, s.recordMetricEvent)
    99  
   100  	s.raftServer = raftServer
   101  }
   102  
   103  // Helper function to do discovery and return results in expected format
   104  func (s *PeerServer) handleDiscovery(discoverURL string) (peers []string, err error) {
   105  	peers, err = discovery.Do(discoverURL, s.Config.Name, s.Config.URL)
   106  
   107  	// Warn about errors coming from discovery, this isn't fatal
   108  	// since the user might have provided a peer list elsewhere,
   109  	// or there is some log in data dir.
   110  	if err != nil {
   111  		log.Warnf("Discovery encountered an error: %v", err)
   112  		return
   113  	}
   114  
   115  	for i := range peers {
   116  		// Strip the scheme off of the peer if it has one
   117  		// TODO(bp): clean this up!
   118  		purl, err := url.Parse(peers[i])
   119  		if err == nil {
   120  			peers[i] = purl.Host
   121  		}
   122  	}
   123  
   124  	log.Infof("Discovery fetched back peer list: %v", peers)
   125  
   126  	return
   127  }
   128  
   129  // Try all possible ways to find clusters to join
   130  // Include -discovery, -peers and log data in -data-dir
   131  //
   132  // Peer discovery follows this order:
   133  // 1. -discovery
   134  // 2. -peers
   135  // 3. previous peers in -data-dir
   136  func (s *PeerServer) findCluster(discoverURL string, peers []string) {
   137  	// Attempt cluster discovery
   138  	toDiscover := discoverURL != ""
   139  	if toDiscover {
   140  		discoverPeers, discoverErr := s.handleDiscovery(discoverURL)
   141  		// It is registered in discover url
   142  		if discoverErr == nil {
   143  			// start as a leader in a new cluster
   144  			if len(discoverPeers) == 0 {
   145  				log.Debug("This peer is starting a brand new cluster based on discover URL.")
   146  				s.startAsLeader()
   147  			} else {
   148  				s.startAsFollower(discoverPeers)
   149  			}
   150  			return
   151  		}
   152  	}
   153  
   154  	hasPeerList := len(peers) > 0
   155  	// if there is log in data dir, append previous peers to peers in config
   156  	// to find cluster
   157  	prevPeers := s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name)
   158  	for i := 0; i < len(prevPeers); i++ {
   159  		u, err := url.Parse(prevPeers[i])
   160  		if err != nil {
   161  			log.Debug("rejoin cannot parse url: ", err)
   162  		}
   163  		prevPeers[i] = u.Host
   164  	}
   165  	peers = append(peers, prevPeers...)
   166  
   167  	// if there is backup peer lists, use it to find cluster
   168  	if len(peers) > 0 {
   169  		ok := s.joinCluster(peers)
   170  		if !ok {
   171  			log.Warn("No living peers are found!")
   172  		} else {
   173  			log.Debugf("%s restart as a follower based on peers[%v]", s.Config.Name)
   174  			return
   175  		}
   176  	}
   177  
   178  	if !s.raftServer.IsLogEmpty() {
   179  		log.Debug("Entire cluster is down! %v will restart the cluster.", s.Config.Name)
   180  		return
   181  	}
   182  
   183  	if toDiscover {
   184  		log.Fatalf("Discovery failed, no available peers in backup list, and no log data")
   185  	}
   186  
   187  	if hasPeerList {
   188  		log.Fatalf("No available peers in backup list, and no log data")
   189  	}
   190  
   191  	log.Infof("This peer is starting a brand new cluster now.")
   192  	s.startAsLeader()
   193  }
   194  
   195  // Start the raft server
   196  func (s *PeerServer) Start(snapshot bool, discoverURL string, peers []string) error {
   197  	// LoadSnapshot
   198  	if snapshot {
   199  		err := s.raftServer.LoadSnapshot()
   200  
   201  		if err == nil {
   202  			log.Debugf("%s finished load snapshot", s.Config.Name)
   203  		} else {
   204  			log.Debug(err)
   205  		}
   206  	}
   207  
   208  	s.raftServer.Start()
   209  
   210  	s.findCluster(discoverURL, peers)
   211  
   212  	s.closeChan = make(chan bool)
   213  
   214  	go s.monitorSync()
   215  	go s.monitorTimeoutThreshold(s.closeChan)
   216  
   217  	// open the snapshot
   218  	if snapshot {
   219  		go s.monitorSnapshot()
   220  	}
   221  
   222  	return nil
   223  }
   224  
   225  func (s *PeerServer) Stop() {
   226  	if s.closeChan != nil {
   227  		close(s.closeChan)
   228  		s.closeChan = nil
   229  	}
   230  	s.raftServer.Stop()
   231  }
   232  
   233  func (s *PeerServer) HTTPHandler() http.Handler {
   234  	router := mux.NewRouter()
   235  
   236  	// internal commands
   237  	router.HandleFunc("/name", s.NameHttpHandler)
   238  	router.HandleFunc("/version", s.VersionHttpHandler)
   239  	router.HandleFunc("/version/{version:[0-9]+}/check", s.VersionCheckHttpHandler)
   240  	router.HandleFunc("/upgrade", s.UpgradeHttpHandler)
   241  	router.HandleFunc("/join", s.JoinHttpHandler)
   242  	router.HandleFunc("/remove/{name:.+}", s.RemoveHttpHandler)
   243  	router.HandleFunc("/vote", s.VoteHttpHandler)
   244  	router.HandleFunc("/log", s.GetLogHttpHandler)
   245  	router.HandleFunc("/log/append", s.AppendEntriesHttpHandler)
   246  	router.HandleFunc("/snapshot", s.SnapshotHttpHandler)
   247  	router.HandleFunc("/snapshotRecovery", s.SnapshotRecoveryHttpHandler)
   248  	router.HandleFunc("/etcdURL", s.EtcdURLHttpHandler)
   249  
   250  	return router
   251  }
   252  
   253  // Retrieves the underlying Raft server.
   254  func (s *PeerServer) RaftServer() raft.Server {
   255  	return s.raftServer
   256  }
   257  
   258  // Associates the client server with the peer server.
   259  func (s *PeerServer) SetServer(server *Server) {
   260  	s.server = server
   261  }
   262  
   263  func (s *PeerServer) startAsLeader() {
   264  	// leader need to join self as a peer
   265  	for {
   266  		_, err := s.raftServer.Do(NewJoinCommand(store.MinVersion(), store.MaxVersion(), s.raftServer.Name(), s.Config.URL, s.server.URL()))
   267  		if err == nil {
   268  			break
   269  		}
   270  	}
   271  	log.Debugf("%s start as a leader", s.Config.Name)
   272  }
   273  
   274  func (s *PeerServer) startAsFollower(cluster []string) {
   275  	// start as a follower in a existing cluster
   276  	for i := 0; i < s.Config.RetryTimes; i++ {
   277  		ok := s.joinCluster(cluster)
   278  		if ok {
   279  			return
   280  		}
   281  		log.Warnf("%v is unable to join the cluster using any of the peers %v at %dth time. Retrying in %.1f seconds", s.Config.Name, cluster, i, s.Config.RetryInterval)
   282  		time.Sleep(time.Second * time.Duration(s.Config.RetryInterval))
   283  	}
   284  
   285  	log.Fatalf("Cannot join the cluster via given peers after %x retries", s.Config.RetryTimes)
   286  }
   287  
   288  // getVersion fetches the peer version of a cluster.
   289  func getVersion(t *transporter, versionURL url.URL) (int, error) {
   290  	resp, req, err := t.Get(versionURL.String())
   291  	if err != nil {
   292  		return 0, err
   293  	}
   294  	defer resp.Body.Close()
   295  
   296  	t.CancelWhenTimeout(req)
   297  	body, err := ioutil.ReadAll(resp.Body)
   298  	if err != nil {
   299  		return 0, err
   300  	}
   301  
   302  	// Parse version number.
   303  	version, _ := strconv.Atoi(string(body))
   304  	return version, nil
   305  }
   306  
   307  // Upgradable checks whether all peers in a cluster support an upgrade to the next store version.
   308  func (s *PeerServer) Upgradable() error {
   309  	nextVersion := s.store.Version() + 1
   310  	for _, peerURL := range s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name) {
   311  		u, err := url.Parse(peerURL)
   312  		if err != nil {
   313  			return fmt.Errorf("PeerServer: Cannot parse URL: '%s' (%s)", peerURL, err)
   314  		}
   315  
   316  		t, _ := s.raftServer.Transporter().(*transporter)
   317  		checkURL := (&url.URL{Host: u.Host, Scheme: s.Config.Scheme, Path: fmt.Sprintf("/version/%d/check", nextVersion)}).String()
   318  		resp, _, err := t.Get(checkURL)
   319  		if err != nil {
   320  			return fmt.Errorf("PeerServer: Cannot check version compatibility: %s", u.Host)
   321  		}
   322  		if resp.StatusCode != 200 {
   323  			return fmt.Errorf("PeerServer: Version %d is not compatible with peer: %s", nextVersion, u.Host)
   324  		}
   325  	}
   326  
   327  	return nil
   328  }
   329  
   330  func (s *PeerServer) joinCluster(cluster []string) bool {
   331  	for _, peer := range cluster {
   332  		if len(peer) == 0 {
   333  			continue
   334  		}
   335  
   336  		err := s.joinByPeer(s.raftServer, peer, s.Config.Scheme)
   337  		if err == nil {
   338  			log.Debugf("%s joined the cluster via peer %s", s.Config.Name, peer)
   339  			return true
   340  
   341  		}
   342  
   343  		if _, ok := err.(etcdErr.Error); ok {
   344  			log.Fatal(err)
   345  		}
   346  
   347  		log.Warnf("Attempt to join via %s failed: %s", peer, err)
   348  	}
   349  
   350  	return false
   351  }
   352  
   353  // Send join requests to peer.
   354  func (s *PeerServer) joinByPeer(server raft.Server, peer string, scheme string) error {
   355  	var b bytes.Buffer
   356  
   357  	// t must be ok
   358  	t, _ := server.Transporter().(*transporter)
   359  
   360  	// Our version must match the leaders version
   361  	versionURL := url.URL{Host: peer, Scheme: scheme, Path: "/version"}
   362  	version, err := getVersion(t, versionURL)
   363  	if err != nil {
   364  		return fmt.Errorf("Error during join version check: %v", err)
   365  	}
   366  	if version < store.MinVersion() || version > store.MaxVersion() {
   367  		return fmt.Errorf("Unable to join: cluster version is %d; version compatibility is %d - %d", version, store.MinVersion(), store.MaxVersion())
   368  	}
   369  
   370  	json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL()))
   371  
   372  	joinURL := url.URL{Host: peer, Scheme: scheme, Path: "/join"}
   373  
   374  	log.Debugf("Send Join Request to %s", joinURL.String())
   375  
   376  	resp, req, err := t.Post(joinURL.String(), &b)
   377  
   378  	for {
   379  		if err != nil {
   380  			return fmt.Errorf("Unable to join: %v", err)
   381  		}
   382  		if resp != nil {
   383  			defer resp.Body.Close()
   384  
   385  			t.CancelWhenTimeout(req)
   386  
   387  			if resp.StatusCode == http.StatusOK {
   388  				b, _ := ioutil.ReadAll(resp.Body)
   389  				s.joinIndex, _ = binary.Uvarint(b)
   390  				return nil
   391  			}
   392  			if resp.StatusCode == http.StatusTemporaryRedirect {
   393  				address := resp.Header.Get("Location")
   394  				log.Debugf("Send Join Request to %s", address)
   395  				json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL()))
   396  				resp, req, err = t.Post(address, &b)
   397  
   398  			} else if resp.StatusCode == http.StatusBadRequest {
   399  				log.Debug("Reach max number peers in the cluster")
   400  				decoder := json.NewDecoder(resp.Body)
   401  				err := &etcdErr.Error{}
   402  				decoder.Decode(err)
   403  				return *err
   404  			} else {
   405  				return fmt.Errorf("Unable to join")
   406  			}
   407  		}
   408  
   409  	}
   410  }
   411  
   412  func (s *PeerServer) Stats() []byte {
   413  	s.serverStats.LeaderInfo.Uptime = time.Now().Sub(s.serverStats.LeaderInfo.startTime).String()
   414  
   415  	// TODO: register state listener to raft to change this field
   416  	// rather than compare the state each time Stats() is called.
   417  	if s.RaftServer().State() == raft.Leader {
   418  		s.serverStats.LeaderInfo.Name = s.RaftServer().Name()
   419  	}
   420  
   421  	queue := s.serverStats.sendRateQueue
   422  
   423  	s.serverStats.SendingPkgRate, s.serverStats.SendingBandwidthRate = queue.Rate()
   424  
   425  	queue = s.serverStats.recvRateQueue
   426  
   427  	s.serverStats.RecvingPkgRate, s.serverStats.RecvingBandwidthRate = queue.Rate()
   428  
   429  	b, _ := json.Marshal(s.serverStats)
   430  
   431  	return b
   432  }
   433  
   434  func (s *PeerServer) PeerStats() []byte {
   435  	if s.raftServer.State() == raft.Leader {
   436  		b, _ := json.Marshal(s.followersStats)
   437  		return b
   438  	}
   439  	return nil
   440  }
   441  
   442  // raftEventLogger converts events from the Raft server into log messages.
   443  func (s *PeerServer) raftEventLogger(event raft.Event) {
   444  	value := event.Value()
   445  	prevValue := event.PrevValue()
   446  	if value == nil {
   447  		value = "<nil>"
   448  	}
   449  	if prevValue == nil {
   450  		prevValue = "<nil>"
   451  	}
   452  
   453  	switch event.Type() {
   454  	case raft.StateChangeEventType:
   455  		log.Infof("%s: state changed from '%v' to '%v'.", s.Config.Name, prevValue, value)
   456  	case raft.TermChangeEventType:
   457  		log.Infof("%s: term #%v started.", s.Config.Name, value)
   458  	case raft.LeaderChangeEventType:
   459  		log.Infof("%s: leader changed from '%v' to '%v'.", s.Config.Name, prevValue, value)
   460  	case raft.AddPeerEventType:
   461  		log.Infof("%s: peer added: '%v'", s.Config.Name, value)
   462  	case raft.RemovePeerEventType:
   463  		log.Infof("%s: peer removed: '%v'", s.Config.Name, value)
   464  	case raft.HeartbeatIntervalEventType:
   465  		var name = "<unknown>"
   466  		if peer, ok := value.(*raft.Peer); ok {
   467  			name = peer.Name
   468  		}
   469  		log.Infof("%s: warning: heartbeat timed out: '%v'", s.Config.Name, name)
   470  	case raft.ElectionTimeoutThresholdEventType:
   471  		select {
   472  		case s.timeoutThresholdChan <- value:
   473  		default:
   474  		}
   475  
   476  	}
   477  }
   478  
   479  func (s *PeerServer) recordMetricEvent(event raft.Event) {
   480  	name := fmt.Sprintf("raft.event.%s", event.Type())
   481  	value := event.Value().(time.Duration)
   482  	(*s.metrics).Timer(name).Update(value)
   483  }
   484  
   485  // logSnapshot logs about the snapshot that was taken.
   486  func (s *PeerServer) logSnapshot(err error, currentIndex, count uint64) {
   487  	info := fmt.Sprintf("%s: snapshot of %d events at index %d", s.Config.Name, count, currentIndex)
   488  
   489  	if err != nil {
   490  		log.Infof("%s attempted and failed: %v", info, err)
   491  	} else {
   492  		log.Infof("%s completed", info)
   493  	}
   494  }
   495  
   496  func (s *PeerServer) monitorSnapshot() {
   497  	for {
   498  		time.Sleep(s.snapConf.checkingInterval)
   499  		currentIndex := s.RaftServer().CommitIndex()
   500  		count := currentIndex - s.snapConf.lastIndex
   501  		if uint64(count) > s.snapConf.snapshotThr {
   502  			err := s.raftServer.TakeSnapshot()
   503  			s.logSnapshot(err, currentIndex, count)
   504  			s.snapConf.lastIndex = currentIndex
   505  		}
   506  	}
   507  }
   508  
   509  func (s *PeerServer) monitorSync() {
   510  	ticker := time.Tick(time.Millisecond * 500)
   511  	for {
   512  		select {
   513  		case now := <-ticker:
   514  			if s.raftServer.State() == raft.Leader {
   515  				s.raftServer.Do(s.store.CommandFactory().CreateSyncCommand(now))
   516  			}
   517  		}
   518  	}
   519  }
   520  
   521  // monitorTimeoutThreshold groups timeout threshold events together and prints
   522  // them as a single log line.
   523  func (s *PeerServer) monitorTimeoutThreshold(closeChan chan bool) {
   524  	for {
   525  		select {
   526  		case value := <-s.timeoutThresholdChan:
   527  			log.Infof("%s: warning: heartbeat near election timeout: %v", s.Config.Name, value)
   528  		case <-closeChan:
   529  			return
   530  		}
   531  
   532  		time.Sleep(ThresholdMonitorTimeout)
   533  	}
   534  }