github.com/alpe/etcd@v0.1.2-0.20130915230056-09f31af88aeb/raft_server.go (about) 1 package main 2 3 import ( 4 "bytes" 5 "crypto/tls" 6 "encoding/binary" 7 "encoding/json" 8 "fmt" 9 "io/ioutil" 10 "net/http" 11 "net/url" 12 "time" 13 14 etcdErr "github.com/coreos/etcd/error" 15 "github.com/coreos/go-raft" 16 ) 17 18 type raftServer struct { 19 *raft.Server 20 version string 21 joinIndex uint64 22 name string 23 url string 24 listenHost string 25 tlsConf *TLSConfig 26 tlsInfo *TLSInfo 27 peersStats map[string]*raftPeerStats 28 serverStats *raftServerStats 29 } 30 31 var r *raftServer 32 33 func newRaftServer(name string, url string, listenHost string, tlsConf *TLSConfig, tlsInfo *TLSInfo) *raftServer { 34 35 // Create transporter for raft 36 raftTransporter := newTransporter(tlsConf.Scheme, tlsConf.Client, ElectionTimeout) 37 38 // Create raft server 39 server, err := raft.NewServer(name, dirPath, raftTransporter, etcdStore, nil) 40 41 check(err) 42 43 return &raftServer{ 44 Server: server, 45 version: raftVersion, 46 name: name, 47 url: url, 48 listenHost: listenHost, 49 tlsConf: tlsConf, 50 tlsInfo: tlsInfo, 51 peersStats: make(map[string]*raftPeerStats), 52 serverStats: &raftServerStats{ 53 StartTime: time.Now(), 54 sendRateQueue: &statsQueue{ 55 back: -1, 56 }, 57 recvRateQueue: &statsQueue{ 58 back: -1, 59 }, 60 }, 61 } 62 } 63 64 // Start the raft server 65 func (r *raftServer) ListenAndServe() { 66 67 // Setup commands. 68 registerCommands() 69 70 // LoadSnapshot 71 if snapshot { 72 err := r.LoadSnapshot() 73 74 if err == nil { 75 debugf("%s finished load snapshot", r.name) 76 } else { 77 debug(err) 78 } 79 } 80 81 r.SetElectionTimeout(ElectionTimeout) 82 r.SetHeartbeatTimeout(HeartbeatTimeout) 83 84 r.Start() 85 86 if r.IsLogEmpty() { 87 88 // start as a leader in a new cluster 89 if len(cluster) == 0 { 90 startAsLeader() 91 92 } else { 93 startAsFollower() 94 } 95 96 } else { 97 98 // rejoin the previous cluster 99 cluster = getMachines(nameToRaftURL) 100 for i := 0; i < len(cluster); i++ { 101 u, err := url.Parse(cluster[i]) 102 if err != nil { 103 debug("rejoin cannot parse url: ", err) 104 } 105 cluster[i] = u.Host 106 } 107 ok := joinCluster(cluster) 108 if !ok { 109 warn("the entire cluster is down! this machine will restart the cluster.") 110 } 111 112 debugf("%s restart as a follower", r.name) 113 } 114 115 // open the snapshot 116 if snapshot { 117 go monitorSnapshot() 118 } 119 120 // start to response to raft requests 121 go r.startTransport(r.tlsConf.Scheme, r.tlsConf.Server) 122 123 } 124 125 func startAsLeader() { 126 // leader need to join self as a peer 127 for { 128 _, err := r.Do(newJoinCommand()) 129 if err == nil { 130 break 131 } 132 } 133 debugf("%s start as a leader", r.name) 134 } 135 136 func startAsFollower() { 137 // start as a follower in a existing cluster 138 for i := 0; i < retryTimes; i++ { 139 ok := joinCluster(cluster) 140 if ok { 141 return 142 } 143 warnf("cannot join to cluster via given machines, retry in %d seconds", RetryInterval) 144 time.Sleep(time.Second * RetryInterval) 145 } 146 147 fatalf("Cannot join the cluster via given machines after %x retries", retryTimes) 148 } 149 150 // Start to listen and response raft command 151 func (r *raftServer) startTransport(scheme string, tlsConf tls.Config) { 152 infof("raft server [name %s, listen on %s, advertised url %s]", r.name, r.listenHost, r.url) 153 154 raftMux := http.NewServeMux() 155 156 server := &http.Server{ 157 Handler: raftMux, 158 TLSConfig: &tlsConf, 159 Addr: r.listenHost, 160 } 161 162 // internal commands 163 raftMux.HandleFunc("/name", NameHttpHandler) 164 raftMux.HandleFunc("/version", RaftVersionHttpHandler) 165 raftMux.Handle("/join", errorHandler(JoinHttpHandler)) 166 raftMux.HandleFunc("/remove/", RemoveHttpHandler) 167 raftMux.HandleFunc("/vote", VoteHttpHandler) 168 raftMux.HandleFunc("/log", GetLogHttpHandler) 169 raftMux.HandleFunc("/log/append", AppendEntriesHttpHandler) 170 raftMux.HandleFunc("/snapshot", SnapshotHttpHandler) 171 raftMux.HandleFunc("/snapshotRecovery", SnapshotRecoveryHttpHandler) 172 raftMux.HandleFunc("/etcdURL", EtcdURLHttpHandler) 173 174 if scheme == "http" { 175 fatal(server.ListenAndServe()) 176 } else { 177 fatal(server.ListenAndServeTLS(r.tlsInfo.CertFile, r.tlsInfo.KeyFile)) 178 } 179 180 } 181 182 // getVersion fetches the raft version of a peer. This works for now but we 183 // will need to do something more sophisticated later when we allow mixed 184 // version clusters. 185 func getVersion(t *transporter, versionURL url.URL) (string, error) { 186 resp, err := t.Get(versionURL.String()) 187 188 if err != nil { 189 return "", err 190 } 191 192 defer resp.Body.Close() 193 body, err := ioutil.ReadAll(resp.Body) 194 195 return string(body), nil 196 } 197 198 func joinCluster(cluster []string) bool { 199 for _, machine := range cluster { 200 201 if len(machine) == 0 { 202 continue 203 } 204 205 err := joinByMachine(r.Server, machine, r.tlsConf.Scheme) 206 if err == nil { 207 debugf("%s success join to the cluster via machine %s", r.name, machine) 208 return true 209 210 } else { 211 if _, ok := err.(etcdErr.Error); ok { 212 fatal(err) 213 } 214 215 debugf("cannot join to cluster via machine %s %s", machine, err) 216 } 217 } 218 return false 219 } 220 221 // Send join requests to machine. 222 func joinByMachine(s *raft.Server, machine string, scheme string) error { 223 var b bytes.Buffer 224 225 // t must be ok 226 t, _ := r.Transporter().(*transporter) 227 228 // Our version must match the leaders version 229 versionURL := url.URL{Host: machine, Scheme: scheme, Path: "/version"} 230 version, err := getVersion(t, versionURL) 231 if err != nil { 232 return fmt.Errorf("Unable to join: %v", err) 233 } 234 235 // TODO: versioning of the internal protocol. See: 236 // Documentation/internatl-protocol-versioning.md 237 if version != r.version { 238 return fmt.Errorf("Unable to join: internal version mismatch, entire cluster must be running identical versions of etcd") 239 } 240 241 json.NewEncoder(&b).Encode(newJoinCommand()) 242 243 joinURL := url.URL{Host: machine, Scheme: scheme, Path: "/join"} 244 245 debugf("Send Join Request to %s", joinURL.String()) 246 247 resp, err := t.Post(joinURL.String(), &b) 248 249 for { 250 if err != nil { 251 return fmt.Errorf("Unable to join: %v", err) 252 } 253 if resp != nil { 254 defer resp.Body.Close() 255 if resp.StatusCode == http.StatusOK { 256 b, _ := ioutil.ReadAll(resp.Body) 257 r.joinIndex, _ = binary.Uvarint(b) 258 return nil 259 } 260 if resp.StatusCode == http.StatusTemporaryRedirect { 261 262 address := resp.Header.Get("Location") 263 debugf("Send Join Request to %s", address) 264 265 json.NewEncoder(&b).Encode(newJoinCommand()) 266 267 resp, err = t.Post(address, &b) 268 269 } else if resp.StatusCode == http.StatusBadRequest { 270 debug("Reach max number machines in the cluster") 271 decoder := json.NewDecoder(resp.Body) 272 err := &etcdErr.Error{} 273 decoder.Decode(err) 274 return *err 275 } else { 276 return fmt.Errorf("Unable to join") 277 } 278 } 279 280 } 281 return fmt.Errorf("Unable to join: %v", err) 282 } 283 284 func (r *raftServer) Stats() []byte { 285 r.serverStats.LeaderUptime = time.Now().Sub(r.serverStats.leaderStartTime).String() 286 287 queue := r.serverStats.sendRateQueue 288 289 r.serverStats.SendingPkgRate, r.serverStats.SendingBandwidthRate = queue.Rate() 290 291 queue = r.serverStats.recvRateQueue 292 293 r.serverStats.RecvingPkgRate, r.serverStats.RecvingBandwidthRate = queue.Rate() 294 295 sBytes, err := json.Marshal(r.serverStats) 296 297 if err != nil { 298 warn(err) 299 } 300 301 if r.State() == raft.Leader { 302 pBytes, _ := json.Marshal(r.peersStats) 303 304 b := append(sBytes, pBytes...) 305 return b 306 } 307 308 return sBytes 309 } 310 311 // Register commands to raft server 312 func registerCommands() { 313 raft.RegisterCommand(&JoinCommand{}) 314 raft.RegisterCommand(&RemoveCommand{}) 315 raft.RegisterCommand(&SetCommand{}) 316 raft.RegisterCommand(&GetCommand{}) 317 raft.RegisterCommand(&DeleteCommand{}) 318 raft.RegisterCommand(&WatchCommand{}) 319 raft.RegisterCommand(&TestAndSetCommand{}) 320 }