github.com/marineam/etcd@v0.1.2-0.20130821182615-9b7109b46686/raft_server.go (about) 1 package main 2 3 import ( 4 "bytes" 5 "crypto/tls" 6 "encoding/binary" 7 "encoding/json" 8 "fmt" 9 etcdErr "github.com/coreos/etcd/error" 10 "github.com/coreos/go-raft" 11 "io/ioutil" 12 "net/http" 13 "net/url" 14 "time" 15 ) 16 17 type raftServer struct { 18 *raft.Server 19 version string 20 joinIndex uint64 21 name string 22 url string 23 tlsConf *TLSConfig 24 tlsInfo *TLSInfo 25 } 26 27 var r *raftServer 28 29 func newRaftServer(name string, url string, tlsConf *TLSConfig, tlsInfo *TLSInfo) *raftServer { 30 31 // Create transporter for raft 32 raftTransporter := newTransporter(tlsConf.Scheme, tlsConf.Client) 33 34 // Create raft server 35 server, err := raft.NewServer(name, dirPath, raftTransporter, etcdStore, nil) 36 37 check(err) 38 39 return &raftServer{ 40 Server: server, 41 version: raftVersion, 42 name: name, 43 url: url, 44 tlsConf: tlsConf, 45 tlsInfo: tlsInfo, 46 } 47 } 48 49 // Start the raft server 50 func (r *raftServer) ListenAndServe() { 51 52 // Setup commands. 53 registerCommands() 54 55 // LoadSnapshot 56 if snapshot { 57 err := r.LoadSnapshot() 58 59 if err == nil { 60 debugf("%s finished load snapshot", r.name) 61 } else { 62 debug(err) 63 } 64 } 65 66 r.SetElectionTimeout(ElectionTimeout) 67 r.SetHeartbeatTimeout(HeartbeatTimeout) 68 69 r.Start() 70 71 if r.IsLogEmpty() { 72 73 // start as a leader in a new cluster 74 if len(cluster) == 0 { 75 startAsLeader() 76 77 } else { 78 startAsFollower() 79 } 80 81 } else { 82 83 // rejoin the previous cluster 84 cluster = getMachines(nameToRaftURL) 85 for i := 0; i < len(cluster); i++ { 86 u, err := url.Parse(cluster[i]) 87 if err != nil { 88 debug("rejoin cannot parse url: ", err) 89 } 90 cluster[i] = u.Host 91 } 92 ok := joinCluster(cluster) 93 if !ok { 94 warn("the whole cluster dies! restart the cluster") 95 } 96 97 debugf("%s restart as a follower", r.name) 98 } 99 100 // open the snapshot 101 if snapshot { 102 go monitorSnapshot() 103 } 104 105 // start to response to raft requests 106 go r.startTransport(r.tlsConf.Scheme, r.tlsConf.Server) 107 108 } 109 110 func startAsLeader() { 111 // leader need to join self as a peer 112 for { 113 _, err := r.Do(newJoinCommand()) 114 if err == nil { 115 break 116 } 117 } 118 debugf("%s start as a leader", r.name) 119 } 120 121 func startAsFollower() { 122 // start as a follower in a existing cluster 123 for i := 0; i < retryTimes; i++ { 124 ok := joinCluster(cluster) 125 if ok { 126 return 127 } 128 warnf("cannot join to cluster via given machines, retry in %d seconds", RetryInterval) 129 time.Sleep(time.Second * RetryInterval) 130 } 131 132 fatalf("Cannot join the cluster via given machines after %x retries", retryTimes) 133 } 134 135 // Start to listen and response raft command 136 func (r *raftServer) startTransport(scheme string, tlsConf tls.Config) { 137 u, _ := url.Parse(r.url) 138 infof("raft server [%s:%s]", r.name, u) 139 140 raftMux := http.NewServeMux() 141 142 server := &http.Server{ 143 Handler: raftMux, 144 TLSConfig: &tlsConf, 145 Addr: u.Host, 146 } 147 148 // internal commands 149 raftMux.HandleFunc("/name", NameHttpHandler) 150 raftMux.HandleFunc("/version", RaftVersionHttpHandler) 151 raftMux.Handle("/join", errorHandler(JoinHttpHandler)) 152 raftMux.HandleFunc("/remove/", RemoveHttpHandler) 153 raftMux.HandleFunc("/vote", VoteHttpHandler) 154 raftMux.HandleFunc("/log", GetLogHttpHandler) 155 raftMux.HandleFunc("/log/append", AppendEntriesHttpHandler) 156 raftMux.HandleFunc("/snapshot", SnapshotHttpHandler) 157 raftMux.HandleFunc("/snapshotRecovery", SnapshotRecoveryHttpHandler) 158 raftMux.HandleFunc("/etcdURL", EtcdURLHttpHandler) 159 160 if scheme == "http" { 161 fatal(server.ListenAndServe()) 162 } else { 163 fatal(server.ListenAndServeTLS(r.tlsInfo.CertFile, r.tlsInfo.KeyFile)) 164 } 165 166 } 167 168 // getVersion fetches the raft version of a peer. This works for now but we 169 // will need to do something more sophisticated later when we allow mixed 170 // version clusters. 171 func getVersion(t transporter, versionURL url.URL) (string, error) { 172 resp, err := t.Get(versionURL.String()) 173 174 if err != nil { 175 return "", err 176 } 177 178 defer resp.Body.Close() 179 body, err := ioutil.ReadAll(resp.Body) 180 181 return string(body), nil 182 } 183 184 func joinCluster(cluster []string) bool { 185 for _, machine := range cluster { 186 187 if len(machine) == 0 { 188 continue 189 } 190 191 err := joinByMachine(r.Server, machine, r.tlsConf.Scheme) 192 if err == nil { 193 debugf("%s success join to the cluster via machine %s", r.name, machine) 194 return true 195 196 } else { 197 if _, ok := err.(etcdErr.Error); ok { 198 fatal(err) 199 } 200 debugf("cannot join to cluster via machine %s %s", machine, err) 201 } 202 } 203 return false 204 } 205 206 // Send join requests to machine. 207 func joinByMachine(s *raft.Server, machine string, scheme string) error { 208 var b bytes.Buffer 209 210 // t must be ok 211 t, _ := r.Transporter().(transporter) 212 213 // Our version must match the leaders version 214 versionURL := url.URL{Host: machine, Scheme: scheme, Path: "/version"} 215 version, err := getVersion(t, versionURL) 216 if err != nil { 217 return fmt.Errorf("Unable to join: %v", err) 218 } 219 220 // TODO: versioning of the internal protocol. See: 221 // Documentation/internatl-protocol-versioning.md 222 if version != r.version { 223 return fmt.Errorf("Unable to join: internal version mismatch, entire cluster must be running identical versions of etcd") 224 } 225 226 json.NewEncoder(&b).Encode(newJoinCommand()) 227 228 joinURL := url.URL{Host: machine, Scheme: scheme, Path: "/join"} 229 230 debugf("Send Join Request to %s", joinURL.String()) 231 232 resp, err := t.Post(joinURL.String(), &b) 233 234 for { 235 if err != nil { 236 return fmt.Errorf("Unable to join: %v", err) 237 } 238 if resp != nil { 239 defer resp.Body.Close() 240 if resp.StatusCode == http.StatusOK { 241 b, _ := ioutil.ReadAll(resp.Body) 242 r.joinIndex, _ = binary.Uvarint(b) 243 return nil 244 } 245 if resp.StatusCode == http.StatusTemporaryRedirect { 246 247 address := resp.Header.Get("Location") 248 debugf("Send Join Request to %s", address) 249 250 json.NewEncoder(&b).Encode(newJoinCommand()) 251 252 resp, err = t.Post(address, &b) 253 254 } else if resp.StatusCode == http.StatusBadRequest { 255 debug("Reach max number machines in the cluster") 256 decoder := json.NewDecoder(resp.Body) 257 err := &etcdErr.Error{} 258 decoder.Decode(err) 259 return *err 260 } else { 261 return fmt.Errorf("Unable to join") 262 } 263 } 264 265 } 266 return fmt.Errorf("Unable to join: %v", err) 267 } 268 269 // Register commands to raft server 270 func registerCommands() { 271 raft.RegisterCommand(&JoinCommand{}) 272 raft.RegisterCommand(&RemoveCommand{}) 273 raft.RegisterCommand(&SetCommand{}) 274 raft.RegisterCommand(&GetCommand{}) 275 raft.RegisterCommand(&DeleteCommand{}) 276 raft.RegisterCommand(&WatchCommand{}) 277 raft.RegisterCommand(&TestAndSetCommand{}) 278 }