github.com/jiasir/deis@v1.12.2/builder/etcd/etcd.go (about) 1 /*Package etcd is a library for performing common Etcd tasks. 2 */ 3 package etcd 4 5 import ( 6 "errors" 7 "fmt" 8 "io/ioutil" 9 "net/http" 10 "os" 11 "os/exec" 12 "strings" 13 "time" 14 15 "github.com/Masterminds/cookoo" 16 "github.com/Masterminds/cookoo/log" 17 "github.com/Masterminds/cookoo/safely" 18 "github.com/coreos/go-etcd/etcd" 19 ) 20 21 var ( 22 retryCycles = 2 23 retrySleep = 200 * time.Millisecond 24 ) 25 26 // Getter describes the Get behavior of an Etcd client. 27 // 28 // Usually you will want to use go-etcd/etcd.Client to satisfy this. 29 // 30 // We use an interface because it is more testable. 31 type Getter interface { 32 Get(string, bool, bool) (*etcd.Response, error) 33 } 34 35 // DirCreator describes etcd's CreateDir behavior. 36 // 37 // Usually you will want to use go-etcd/etcd.Client to satisfy this. 38 type DirCreator interface { 39 CreateDir(string, uint64) (*etcd.Response, error) 40 } 41 42 // Watcher watches an etcd entry. 43 type Watcher interface { 44 Watch(string, uint64, bool, chan *etcd.Response, chan bool) (*etcd.Response, error) 45 } 46 47 // Setter sets a value in Etcd. 48 type Setter interface { 49 Set(string, string, uint64) (*etcd.Response, error) 50 } 51 52 // GetterSetter performs get and set operations. 53 type GetterSetter interface { 54 Getter 55 Setter 56 } 57 58 // CreateClient creates a new Etcd client and prepares it for work. 59 // 60 // Params: 61 // - url (string): A server to connect to. 62 // - retries (int): Number of times to retry a connection to the server 63 // - retrySleep (time.Duration): How long to sleep between retries 64 // 65 // Returns: 66 // This puts an *etcd.Client into the context. 67 func CreateClient(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 68 url := p.Get("url", "http://localhost:4001").(string) 69 70 // Backed this out because it's unnecessary so far. 71 //hosts := p.Get("urls", []string{"http://localhost:4001"}).([]string) 72 hosts := []string{url} 73 retryCycles = p.Get("retries", retryCycles).(int) 74 retrySleep = p.Get("retrySleep", retrySleep).(time.Duration) 75 76 // Support `host:port` format, too. 77 for i, host := range hosts { 78 if !strings.Contains(host, "://") { 79 hosts[i] = "http://" + host 80 } 81 } 82 83 client := etcd.NewClient(hosts) 84 client.CheckRetry = checkRetry 85 86 return client, nil 87 } 88 89 // Get performs an etcd Get operation. 90 // 91 // Params: 92 // - client (EtcdGetter): Etcd client 93 // - path (string): The path/key to fetch 94 // 95 // Returns: 96 // - This puts an `etcd.Response` into the context, and returns an error 97 // if the client could not connect. 98 func Get(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 99 cli, ok := p.Has("client") 100 if !ok { 101 return nil, errors.New("No Etcd client found.") 102 } 103 client := cli.(Getter) 104 path := p.Get("path", "/").(string) 105 106 res, err := client.Get(path, false, false) 107 if err != nil { 108 return res, err 109 } 110 111 if !res.Node.Dir { 112 return res, fmt.Errorf("Expected / to be a dir.") 113 } 114 return res, nil 115 } 116 117 // IsRunning checks to see if etcd is running. 118 // 119 // It will test `count` times before giving up. 120 // 121 // Params: 122 // - client (EtcdGetter) 123 // - count (int): Number of times to try before giving up. 124 // 125 // Returns: 126 // boolean true if etcd is listening. 127 func IsRunning(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 128 client := p.Get("client", nil).(Getter) 129 count := p.Get("count", 20).(int) 130 for i := 0; i < count; i++ { 131 _, err := client.Get("/", false, false) 132 if err == nil { 133 return true, nil 134 } 135 log.Infof(c, "Waiting for etcd to come online.") 136 time.Sleep(250 * time.Millisecond) 137 } 138 log.Errf(c, "Etcd is not answering after %d attempts.", count) 139 return false, &cookoo.FatalError{"Could not connect to Etcd."} 140 } 141 142 // Set sets a value in etcd. 143 // 144 // Params: 145 // - key (string): The key 146 // - value (string): The value 147 // - ttl (uint64): Time to live 148 // - client (EtcdGetter): Client, usually an *etcd.Client. 149 // 150 // Returns: 151 // - *etcd.Result 152 func Set(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 153 key := p.Get("key", "").(string) 154 value := p.Get("value", "").(string) 155 ttl := p.Get("ttl", uint64(20)).(uint64) 156 client := p.Get("client", nil).(Setter) 157 158 res, err := client.Set(key, value, ttl) 159 if err != nil { 160 log.Infof(c, "Failed to set %s=%s", key, value) 161 return res, err 162 } 163 164 return res, nil 165 } 166 167 // FindSSHUser finds an SSH user by public key. 168 // 169 // Some parts of the system require that we know not only the SSH key, but also 170 // the name of the user. That information is stored in etcd. 171 // 172 // Params: 173 // - client (EtcdGetter) 174 // - fingerprint (string): The fingerprint of the SSH key. 175 // 176 // Returns: 177 // - username (string) 178 func FindSSHUser(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 179 client := p.Get("client", nil).(Getter) 180 fingerprint := p.Get("fingerprint", nil).(string) 181 182 res, err := client.Get("/deis/builder/users", false, true) 183 if err != nil { 184 log.Warnf(c, "Error querying etcd: %s", err) 185 return "", err 186 } else if res.Node == nil || !res.Node.Dir { 187 log.Warnf(c, "No users found in etcd.") 188 return "", errors.New("Users not found") 189 } 190 for _, user := range res.Node.Nodes { 191 log.Infof(c, "Checking user %s", user.Key) 192 for _, keyprint := range user.Nodes { 193 if strings.HasSuffix(keyprint.Key, fingerprint) { 194 parts := strings.Split(user.Key, "/") 195 username := parts[len(parts)-1] 196 log.Infof(c, "Found user %s for fingerprint %s", username, fingerprint) 197 return username, nil 198 } 199 } 200 } 201 202 return "", fmt.Errorf("User not found for fingerprint %s", fingerprint) 203 } 204 205 // StoreHostKeys stores SSH hostkeys locally. 206 // 207 // First it tries to fetch them from etcd. If the keys are not present there, 208 // it generates new ones and then puts them into etcd. 209 // 210 // Params: 211 // - client(EtcdGetterSetter) 212 // - ciphers([]string): A list of ciphers to generate. Defaults are dsa, 213 // ecdsa, ed25519 and rsa. 214 // - basepath (string): Base path in etcd (ETCD_PATH). 215 // Returns: 216 // 217 func StoreHostKeys(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 218 defaultCiphers := []string{"rsa", "dsa", "ecdsa", "ed25519"} 219 client := p.Get("client", nil).(GetterSetter) 220 ciphers := p.Get("ciphers", defaultCiphers).([]string) 221 basepath := p.Get("basepath", "/deis/builder").(string) 222 223 res, err := client.Get("sshHostKey", false, false) 224 if err != nil || res.Node == nil { 225 log.Infof(c, "Could not get SSH host key from etcd. Generating new ones.") 226 if err := genSSHKeys(c); err != nil { 227 log.Err(c, "Failed to generate SSH keys. Aborting.") 228 return nil, err 229 } 230 if err := keysToEtcd(c, client, ciphers, basepath); err != nil { 231 return nil, err 232 } 233 } else if err := keysToLocal(c, client, ciphers, basepath); err != nil { 234 log.Infof(c, "Fetching SSH host keys from etcd.") 235 return nil, err 236 } 237 238 return nil, nil 239 } 240 241 // keysToLocal copies SSH host keys from etcd to the local file system. 242 // 243 // This only fails if the main key, sshHostKey cannot be stored or retrieved. 244 func keysToLocal(c cookoo.Context, client Getter, ciphers []string, etcdPath string) error { 245 lpath := "/etc/ssh/ssh_host_%s_key" 246 privkey := "%s/sshHost%sKey" 247 for _, cipher := range ciphers { 248 path := fmt.Sprintf(lpath, cipher) 249 key := fmt.Sprintf(privkey, etcdPath, cipher) 250 res, err := client.Get(key, false, false) 251 if err != nil || res.Node == nil { 252 continue 253 } 254 255 content := res.Node.Value 256 if err := ioutil.WriteFile(path, []byte(content), 0600); err != nil { 257 log.Errf(c, "Error writing ssh host key file: %s", err) 258 } 259 } 260 261 // Now get generic key. 262 res, err := client.Get("sshHostKey", false, false) 263 if err != nil || res.Node == nil { 264 return fmt.Errorf("Failed to get sshHostKey from etcd. %v", err) 265 } 266 267 content := res.Node.Value 268 if err := ioutil.WriteFile("/etc/ssh/ssh_host_key", []byte(content), 0600); err != nil { 269 log.Errf(c, "Error writing ssh host key file: %s", err) 270 return err 271 } 272 return nil 273 } 274 275 // keysToEtcd copies local keys into etcd. 276 // 277 // It only fails if it cannot copy ssh_host_key to sshHostKey. All other 278 // abnormal conditions are logged, but not considered to be failures. 279 func keysToEtcd(c cookoo.Context, client Setter, ciphers []string, etcdPath string) error { 280 lpath := "/etc/ssh/ssh_host_%s_key" 281 privkey := "%s/sshHost%sKey" 282 for _, cipher := range ciphers { 283 path := fmt.Sprintf(lpath, cipher) 284 key := fmt.Sprintf(privkey, etcdPath, cipher) 285 content, err := ioutil.ReadFile(path) 286 if err != nil { 287 log.Infof(c, "No key named %s", path) 288 } else if _, err := client.Set(key, string(content), 0); err != nil { 289 log.Errf(c, "Could not store ssh key in etcd: %s", err) 290 } 291 } 292 // Now we set the generic key: 293 if content, err := ioutil.ReadFile("/etc/ssh/ssh_host_key"); err != nil { 294 log.Errf(c, "Could not read the ssh_host_key file.") 295 return err 296 } else if _, err := client.Set("sshHostKey", string(content), 0); err != nil { 297 log.Errf(c, "Failed to set sshHostKey in etcd.") 298 return err 299 } 300 return nil 301 } 302 303 // genSshKeys generates the default set of SSH host keys. 304 func genSSHKeys(c cookoo.Context) error { 305 // Generate a new key 306 out, err := exec.Command("ssh-keygen", "-A").CombinedOutput() 307 if err != nil { 308 log.Infof(c, "ssh-keygen: %s", out) 309 log.Errf(c, "Failed to generate SSH keys: %s", err) 310 return err 311 } 312 return nil 313 } 314 315 // UpdateHostPort intermittently notifies etcd of the builder's address. 316 // 317 // If `port` is specified, this will notify etcd at 10 second intervals that 318 // the builder is listening at $HOST:$PORT, setting the TTL to 20 seconds. 319 // 320 // This will notify etcd as long as the local sshd is running. 321 // 322 // Params: 323 // - base (string): The base path to write the data: $base/host and $base/port. 324 // - host (string): The hostname 325 // - port (string): The port 326 // - client (Setter): The client to use to write the data to etcd. 327 // - sshPid (int): The PID for SSHD. If SSHD dies, this stops notifying. 328 func UpdateHostPort(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 329 base := p.Get("base", "").(string) 330 host := p.Get("host", "").(string) 331 port := p.Get("port", "").(string) 332 client := p.Get("client", nil).(Setter) 333 sshd := p.Get("sshdPid", 0).(int) 334 335 // If no port is specified, we don't do anything. 336 if len(port) == 0 { 337 log.Infof(c, "No external port provided. Not publishing details.") 338 return false, nil 339 } 340 341 var ttl uint64 = 20 342 343 if err := setHostPort(client, base, host, port, ttl); err != nil { 344 log.Errf(c, "Etcd error setting host/port: %s", err) 345 return false, err 346 } 347 348 // Update etcd every ten seconds with this builder's host/port. 349 safely.GoDo(c, func() { 350 ticker := time.NewTicker(10 * time.Second) 351 for range ticker.C { 352 if _, err := os.FindProcess(sshd); err != nil { 353 log.Errf(c, "Lost SSHd process: %s", err) 354 break 355 } else { 356 if err := setHostPort(client, base, host, port, ttl); err != nil { 357 log.Errf(c, "Etcd error setting host/port: %s", err) 358 continue 359 } 360 } 361 } 362 ticker.Stop() 363 }) 364 365 return true, nil 366 } 367 368 func setHostPort(client Setter, base, host, port string, ttl uint64) error { 369 if _, err := client.Set(base+"/host", host, ttl); err != nil { 370 return err 371 } 372 if _, err := client.Set(base+"/port", port, ttl); err != nil { 373 return err 374 } 375 return nil 376 } 377 378 // MakeDir makes a directory in Etcd. 379 // 380 // Params: 381 // - client (EtcdDirCreator): Etcd client 382 // - path (string): The name of the directory to create. 383 // - ttl (uint64): Time to live. 384 // Returns: 385 // *etcd.Response 386 func MakeDir(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 387 name := p.Get("path", "").(string) 388 ttl := p.Get("ttl", uint64(0)).(uint64) 389 cli, ok := p.Has("client") 390 if !ok { 391 return nil, errors.New("No Etcd client found.") 392 } 393 client := cli.(DirCreator) 394 395 if len(name) == 0 { 396 return false, errors.New("Expected directory name to be more than zero characters.") 397 } 398 399 res, err := client.CreateDir(name, ttl) 400 if err != nil { 401 return res, &cookoo.RecoverableError{err.Error()} 402 } 403 404 return res, nil 405 } 406 407 // Watch watches a given path, and executes a git check-repos for each event. 408 // 409 // It starts the watcher and then returns. The watcher runs on its own 410 // goroutine. To stop the watching, send the returned channel a bool. 411 // 412 // Params: 413 // - client (Watcher): An Etcd client. 414 // - path (string): The path to watch 415 // 416 // Returns: 417 // - chan bool: Send this a message to stop the watcher. 418 func Watch(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 419 // etcdctl -C $ETCD watch --recursive /deis/services 420 path := p.Get("path", "/deis/services").(string) 421 cli, ok := p.Has("client") 422 if !ok { 423 return nil, errors.New("No etcd client found.") 424 } 425 client := cli.(Watcher) 426 427 // Stupid hack because etcd watch seems to be broken, constantly complaining 428 // that the JSON it received is malformed. 429 safely.GoDo(c, func() { 430 for { 431 response, err := client.Watch(path, 0, true, nil, nil) 432 if err != nil { 433 log.Errf(c, "Etcd Watch failed: %s", err) 434 time.Sleep(50 * time.Millisecond) 435 continue 436 } 437 438 if response.Node == nil { 439 log.Infof(c, "Unexpected Etcd message: %v", response) 440 } 441 git := exec.Command("/home/git/check-repos") 442 if out, err := git.CombinedOutput(); err != nil { 443 log.Errf(c, "Failed git check-repos: %s", err) 444 log.Infof(c, "Output: %s", out) 445 } 446 } 447 448 }) 449 450 return nil, nil 451 452 /* Watch seems to be broken. So we do this stupid watch loop instead. 453 receiver := make(chan *etcd.Response) 454 stop := make(chan bool) 455 // Buffer the channels so that we don't hang waiting for go-etcd to 456 // read off the channel. 457 stopetcd := make(chan bool, 1) 458 stopwatch := make(chan bool, 1) 459 460 461 // Watch for errors. 462 safely.GoDo(c, func() { 463 // When a receiver is passed in, no *Response is ever returned. Instead, 464 // Watch acts like an error channel, and receiver gets all of the messages. 465 _, err := client.Watch(path, 0, true, receiver, stopetcd) 466 if err != nil { 467 log.Infof(c, "Watcher stopped with error '%s'", err) 468 stopwatch <- true 469 //close(stopwatch) 470 } 471 }) 472 // Watch for events 473 safely.GoDo(c, func() { 474 for { 475 select { 476 case msg := <-receiver: 477 if msg.Node != nil { 478 log.Infof(c, "Received notification %s for %s", msg.Action, msg.Node.Key) 479 } else { 480 log.Infof(c, "Received unexpected etcd message: %v", msg) 481 } 482 git := exec.Command("/home/git/check-repos") 483 if out, err := git.CombinedOutput(); err != nil { 484 log.Errf(c, "Failed git check-repos: %s", err) 485 log.Infof(c, "Output: %s", out) 486 } 487 case <-stopwatch: 488 c.Logf("debug", "Received signal to stop watching events.") 489 return 490 } 491 } 492 }) 493 // Fan out stop requests. 494 safely.GoDo(c, func() { 495 <-stop 496 stopwatch <- true 497 stopetcd <- true 498 close(stopwatch) 499 close(stopetcd) 500 }) 501 502 return stop, nil 503 */ 504 } 505 506 // checkRetry overrides etcd.DefaultCheckRetry. 507 // 508 // It adds configurable number of retries and configurable timesouts. 509 func checkRetry(c *etcd.Cluster, numReqs int, last http.Response, err error) error { 510 if numReqs > retryCycles*len(c.Machines) { 511 return fmt.Errorf("Tried and failed %d cluster connections: %s", retryCycles, err) 512 } 513 514 switch last.StatusCode { 515 case 0: 516 return nil 517 case 500: 518 time.Sleep(retrySleep) 519 return nil 520 case 200: 521 return nil 522 default: 523 return fmt.Errorf("Unhandled HTTP Error: %s %d", last.Status, last.StatusCode) 524 } 525 }