github.com/qorio/etcd@v0.1.2-0.20131003183127-5cc585af9618/raft_server.go (about)

     1  package main
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/tls"
     6  	"encoding/binary"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"net/http"
    11  	"net/url"
    12  	"time"
    13  
    14  	etcdErr "github.com/coreos/etcd/error"
    15  	"github.com/coreos/go-raft"
    16  )
    17  
    18  type raftServer struct {
    19  	*raft.Server
    20  	version        string
    21  	joinIndex      uint64
    22  	name           string
    23  	url            string
    24  	listenHost     string
    25  	tlsConf        *TLSConfig
    26  	tlsInfo        *TLSInfo
    27  	followersStats *raftFollowersStats
    28  	serverStats    *raftServerStats
    29  }
    30  
    31  var r *raftServer
    32  
    33  func newRaftServer(name string, url string, listenHost string, tlsConf *TLSConfig, tlsInfo *TLSInfo) *raftServer {
    34  
    35  	// Create transporter for raft
    36  	raftTransporter := newTransporter(tlsConf.Scheme, tlsConf.Client)
    37  
    38  	// Create raft server
    39  	server, err := raft.NewServer(name, dirPath, raftTransporter, etcdStore, nil, "")
    40  
    41  	check(err)
    42  
    43  	return &raftServer{
    44  		Server:     server,
    45  		version:    raftVersion,
    46  		name:       name,
    47  		url:        url,
    48  		listenHost: listenHost,
    49  		tlsConf:    tlsConf,
    50  		tlsInfo:    tlsInfo,
    51  		followersStats: &raftFollowersStats{
    52  			Leader:    name,
    53  			Followers: make(map[string]*raftFollowerStats),
    54  		},
    55  		serverStats: &raftServerStats{
    56  			StartTime: time.Now(),
    57  			sendRateQueue: &statsQueue{
    58  				back: -1,
    59  			},
    60  			recvRateQueue: &statsQueue{
    61  				back: -1,
    62  			},
    63  		},
    64  	}
    65  }
    66  
    67  // Start the raft server
    68  func (r *raftServer) ListenAndServe() {
    69  	// Setup commands.
    70  	registerCommands()
    71  
    72  	// LoadSnapshot
    73  	if snapshot {
    74  		err := r.LoadSnapshot()
    75  
    76  		if err == nil {
    77  			debugf("%s finished load snapshot", r.name)
    78  		} else {
    79  			debug(err)
    80  		}
    81  	}
    82  
    83  	r.SetElectionTimeout(ElectionTimeout)
    84  	r.SetHeartbeatTimeout(HeartbeatTimeout)
    85  
    86  	r.Start()
    87  
    88  	if r.IsLogEmpty() {
    89  
    90  		// start as a leader in a new cluster
    91  		if len(cluster) == 0 {
    92  			startAsLeader()
    93  
    94  		} else {
    95  			startAsFollower()
    96  		}
    97  
    98  	} else {
    99  
   100  		// rejoin the previous cluster
   101  		cluster = getMachines(nameToRaftURL)
   102  		for i := 0; i < len(cluster); i++ {
   103  			u, err := url.Parse(cluster[i])
   104  			if err != nil {
   105  				debug("rejoin cannot parse url: ", err)
   106  			}
   107  			cluster[i] = u.Host
   108  		}
   109  		ok := joinCluster(cluster)
   110  		if !ok {
   111  			warn("the entire cluster is down! this machine will restart the cluster.")
   112  		}
   113  
   114  		debugf("%s restart as a follower", r.name)
   115  	}
   116  
   117  	// open the snapshot
   118  	if snapshot {
   119  		go monitorSnapshot()
   120  	}
   121  
   122  	// start to response to raft requests
   123  	go r.startTransport(r.tlsConf.Scheme, r.tlsConf.Server)
   124  
   125  }
   126  
   127  func startAsLeader() {
   128  	// leader need to join self as a peer
   129  	for {
   130  		_, err := r.Do(newJoinCommand())
   131  		if err == nil {
   132  			break
   133  		}
   134  	}
   135  	debugf("%s start as a leader", r.name)
   136  }
   137  
   138  func startAsFollower() {
   139  	// start as a follower in a existing cluster
   140  	for i := 0; i < retryTimes; i++ {
   141  		ok := joinCluster(cluster)
   142  		if ok {
   143  			return
   144  		}
   145  		warnf("cannot join to cluster via given machines, retry in %d seconds", RetryInterval)
   146  		time.Sleep(time.Second * RetryInterval)
   147  	}
   148  
   149  	fatalf("Cannot join the cluster via given machines after %x retries", retryTimes)
   150  }
   151  
   152  // Start to listen and response raft command
   153  func (r *raftServer) startTransport(scheme string, tlsConf tls.Config) {
   154  	infof("raft server [name %s, listen on %s, advertised url %s]", r.name, r.listenHost, r.url)
   155  
   156  	raftMux := http.NewServeMux()
   157  
   158  	server := &http.Server{
   159  		Handler:   raftMux,
   160  		TLSConfig: &tlsConf,
   161  		Addr:      r.listenHost,
   162  	}
   163  
   164  	// internal commands
   165  	raftMux.HandleFunc("/name", NameHttpHandler)
   166  	raftMux.HandleFunc("/version", RaftVersionHttpHandler)
   167  	raftMux.Handle("/join", errorHandler(JoinHttpHandler))
   168  	raftMux.HandleFunc("/remove/", RemoveHttpHandler)
   169  	raftMux.HandleFunc("/vote", VoteHttpHandler)
   170  	raftMux.HandleFunc("/log", GetLogHttpHandler)
   171  	raftMux.HandleFunc("/log/append", AppendEntriesHttpHandler)
   172  	raftMux.HandleFunc("/snapshot", SnapshotHttpHandler)
   173  	raftMux.HandleFunc("/snapshotRecovery", SnapshotRecoveryHttpHandler)
   174  	raftMux.HandleFunc("/etcdURL", EtcdURLHttpHandler)
   175  
   176  	if scheme == "http" {
   177  		fatal(server.ListenAndServe())
   178  	} else {
   179  		fatal(server.ListenAndServeTLS(r.tlsInfo.CertFile, r.tlsInfo.KeyFile))
   180  	}
   181  
   182  }
   183  
   184  // getVersion fetches the raft version of a peer. This works for now but we
   185  // will need to do something more sophisticated later when we allow mixed
   186  // version clusters.
   187  func getVersion(t *transporter, versionURL url.URL) (string, error) {
   188  	resp, req, err := t.Get(versionURL.String())
   189  
   190  	if err != nil {
   191  		return "", err
   192  	}
   193  
   194  	defer resp.Body.Close()
   195  
   196  	t.CancelWhenTimeout(req)
   197  
   198  	body, err := ioutil.ReadAll(resp.Body)
   199  
   200  	return string(body), nil
   201  }
   202  
   203  func joinCluster(cluster []string) bool {
   204  	for _, machine := range cluster {
   205  
   206  		if len(machine) == 0 {
   207  			continue
   208  		}
   209  
   210  		err := joinByMachine(r.Server, machine, r.tlsConf.Scheme)
   211  		if err == nil {
   212  			debugf("%s success join to the cluster via machine %s", r.name, machine)
   213  			return true
   214  
   215  		} else {
   216  			if _, ok := err.(etcdErr.Error); ok {
   217  				fatal(err)
   218  			}
   219  
   220  			debugf("cannot join to cluster via machine %s %s", machine, err)
   221  		}
   222  	}
   223  	return false
   224  }
   225  
   226  // Send join requests to machine.
   227  func joinByMachine(s *raft.Server, machine string, scheme string) error {
   228  	var b bytes.Buffer
   229  
   230  	// t must be ok
   231  	t, _ := r.Transporter().(*transporter)
   232  
   233  	// Our version must match the leaders version
   234  	versionURL := url.URL{Host: machine, Scheme: scheme, Path: "/version"}
   235  	version, err := getVersion(t, versionURL)
   236  	if err != nil {
   237  		return fmt.Errorf("Unable to join: %v", err)
   238  	}
   239  
   240  	// TODO: versioning of the internal protocol. See:
   241  	// Documentation/internatl-protocol-versioning.md
   242  	if version != r.version {
   243  		return fmt.Errorf("Unable to join: internal version mismatch, entire cluster must be running identical versions of etcd")
   244  	}
   245  
   246  	json.NewEncoder(&b).Encode(newJoinCommand())
   247  
   248  	joinURL := url.URL{Host: machine, Scheme: scheme, Path: "/join"}
   249  
   250  	debugf("Send Join Request to %s", joinURL.String())
   251  
   252  	resp, req, err := t.Post(joinURL.String(), &b)
   253  
   254  	for {
   255  		if err != nil {
   256  			return fmt.Errorf("Unable to join: %v", err)
   257  		}
   258  		if resp != nil {
   259  			defer resp.Body.Close()
   260  
   261  			t.CancelWhenTimeout(req)
   262  
   263  			if resp.StatusCode == http.StatusOK {
   264  				b, _ := ioutil.ReadAll(resp.Body)
   265  				r.joinIndex, _ = binary.Uvarint(b)
   266  				return nil
   267  			}
   268  			if resp.StatusCode == http.StatusTemporaryRedirect {
   269  
   270  				address := resp.Header.Get("Location")
   271  				debugf("Send Join Request to %s", address)
   272  
   273  				json.NewEncoder(&b).Encode(newJoinCommand())
   274  
   275  				resp, req, err = t.Post(address, &b)
   276  
   277  			} else if resp.StatusCode == http.StatusBadRequest {
   278  				debug("Reach max number machines in the cluster")
   279  				decoder := json.NewDecoder(resp.Body)
   280  				err := &etcdErr.Error{}
   281  				decoder.Decode(err)
   282  				return *err
   283  			} else {
   284  				return fmt.Errorf("Unable to join")
   285  			}
   286  		}
   287  
   288  	}
   289  	return fmt.Errorf("Unable to join: %v", err)
   290  }
   291  
   292  func (r *raftServer) Stats() []byte {
   293  	r.serverStats.LeaderInfo.Uptime = time.Now().Sub(r.serverStats.LeaderInfo.startTime).String()
   294  
   295  	queue := r.serverStats.sendRateQueue
   296  
   297  	r.serverStats.SendingPkgRate, r.serverStats.SendingBandwidthRate = queue.Rate()
   298  
   299  	queue = r.serverStats.recvRateQueue
   300  
   301  	r.serverStats.RecvingPkgRate, r.serverStats.RecvingBandwidthRate = queue.Rate()
   302  
   303  	b, _ := json.Marshal(r.serverStats)
   304  
   305  	return b
   306  }
   307  
   308  func (r *raftServer) PeerStats() []byte {
   309  	if r.State() == raft.Leader {
   310  		b, _ := json.Marshal(r.followersStats)
   311  		return b
   312  	}
   313  	return nil
   314  }
   315  
   316  // Register commands to raft server
   317  func registerCommands() {
   318  	raft.RegisterCommand(&JoinCommand{})
   319  	raft.RegisterCommand(&RemoveCommand{})
   320  	raft.RegisterCommand(&SetCommand{})
   321  	raft.RegisterCommand(&GetCommand{})
   322  	raft.RegisterCommand(&DeleteCommand{})
   323  	raft.RegisterCommand(&WatchCommand{})
   324  	raft.RegisterCommand(&TestAndSetCommand{})
   325  }