github.com/technosophos/deis@v1.7.1-0.20150915173815-f9005256004b/builder/etcd/etcd.go (about) 1 /*Package etcd is a library for performing common Etcd tasks. 2 */ 3 package etcd 4 5 import ( 6 "errors" 7 "fmt" 8 "io/ioutil" 9 "net/http" 10 "os" 11 "os/exec" 12 "strings" 13 "time" 14 15 "github.com/Masterminds/cookoo" 16 "github.com/Masterminds/cookoo/log" 17 "github.com/Masterminds/cookoo/safely" 18 "github.com/coreos/go-etcd/etcd" 19 ) 20 21 var ( 22 retryCycles = 2 23 retrySleep = 200 * time.Millisecond 24 ) 25 26 // Getter describes the Get behavior of an Etcd client. 27 // 28 // Usually you will want to use go-etcd/etcd.Client to satisfy this. 29 // 30 // We use an interface because it is more testable. 31 type Getter interface { 32 Get(string, bool, bool) (*etcd.Response, error) 33 } 34 35 // DirCreator describes etcd's CreateDir behavior. 36 // 37 // Usually you will want to use go-etcd/etcd.Client to satisfy this. 38 type DirCreator interface { 39 CreateDir(string, uint64) (*etcd.Response, error) 40 } 41 42 // Watcher watches an etcd entry. 43 type Watcher interface { 44 Watch(string, uint64, bool, chan *etcd.Response, chan bool) (*etcd.Response, error) 45 } 46 47 // Setter sets a value in Etcd. 48 type Setter interface { 49 Set(string, string, uint64) (*etcd.Response, error) 50 } 51 52 // GetterSetter performs get and set operations. 53 type GetterSetter interface { 54 Getter 55 Setter 56 } 57 58 // CreateClient creates a new Etcd client and prepares it for work. 59 // 60 // Params: 61 // - url (string): A server to connect to. 62 // - retries (int): Number of times to retry a connection to the server 63 // - retrySleep (time.Duration): How long to sleep between retries 64 // 65 // Returns: 66 // This puts an *etcd.Client into the context. 67 func CreateClient(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 68 url := p.Get("url", "http://localhost:4001").(string) 69 70 // Backed this out because it's unnecessary so far. 71 //hosts := p.Get("urls", []string{"http://localhost:4001"}).([]string) 72 hosts := []string{url} 73 retryCycles = p.Get("retries", retryCycles).(int) 74 retrySleep = p.Get("retrySleep", retrySleep).(time.Duration) 75 76 // Support `host:port` format, too. 77 for i, host := range hosts { 78 if !strings.Contains(host, "://") { 79 hosts[i] = "http://" + host 80 } 81 } 82 83 client := etcd.NewClient(hosts) 84 client.CheckRetry = checkRetry 85 86 return client, nil 87 } 88 89 // Get performs an etcd Get operation. 90 // 91 // Params: 92 // - client (EtcdGetter): Etcd client 93 // - path (string): The path/key to fetch 94 // 95 // Returns: 96 // - This puts an `etcd.Response` into the context, and returns an error 97 // if the client could not connect. 98 func Get(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 99 cli, ok := p.Has("client") 100 if !ok { 101 return nil, errors.New("No Etcd client found.") 102 } 103 client := cli.(Getter) 104 path := p.Get("path", "/").(string) 105 106 res, err := client.Get(path, false, false) 107 if err != nil { 108 return res, err 109 } 110 111 if !res.Node.Dir { 112 return res, fmt.Errorf("Expected / to be a dir.") 113 } 114 return res, nil 115 } 116 117 // IsRunning checks to see if etcd is running. 118 // 119 // It will test `count` times before giving up. 120 // 121 // Params: 122 // - client (EtcdGetter) 123 // - count (int): Number of times to try before giving up. 124 // 125 // Returns: 126 // boolean true if etcd is listening. 127 func IsRunning(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 128 client := p.Get("client", nil).(Getter) 129 count := p.Get("count", 20).(int) 130 for i := 0; i < count; i++ { 131 _, err := client.Get("/", false, false) 132 if err == nil { 133 return true, nil 134 } 135 log.Infof(c, "Waiting for etcd to come online.") 136 time.Sleep(250 * time.Millisecond) 137 } 138 log.Errf(c, "Etcd is not answering after %d attempts.", count) 139 return false, &cookoo.FatalError{"Could not connect to Etcd."} 140 } 141 142 // Set sets a value in etcd. 143 // 144 // Params: 145 // - key (string): The key 146 // - value (string): The value 147 // - ttl (uint64): Time to live 148 // - client (EtcdGetter): Client, usually an *etcd.Client. 149 // 150 // Returns: 151 // - *etcd.Result 152 func Set(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 153 key := p.Get("key", "").(string) 154 value := p.Get("value", "").(string) 155 ttl := p.Get("ttl", uint64(20)).(uint64) 156 client := p.Get("client", nil).(Setter) 157 158 res, err := client.Set(key, value, ttl) 159 if err != nil { 160 log.Infof(c, "Failed to set %s=%s", key, value) 161 return res, err 162 } 163 164 return res, nil 165 } 166 167 // FindSSHUser finds an SSH user by public key. 168 // 169 // Some parts of the system require that we know not only the SSH key, but also 170 // the name of the user. That information is stored in etcd. 171 // 172 // Params: 173 // - client (EtcdGetter) 174 // - fingerprint (string): The fingerprint of the SSH key. 175 // 176 // Returns: 177 // - username (string) 178 func FindSSHUser(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 179 client := p.Get("client", nil).(Getter) 180 fingerprint := p.Get("fingerprint", nil).(string) 181 182 res, err := client.Get("/deis/builder/users", false, true) 183 if err != nil { 184 log.Warnf(c, "Error querying etcd: %s", err) 185 return "", err 186 } else if res.Node == nil || !res.Node.Dir { 187 log.Warnf(c, "No users found in etcd.") 188 return "", errors.New("Users not found") 189 } 190 for _, user := range res.Node.Nodes { 191 log.Infof(c, "Checking user %s", user.Key) 192 for _, keyprint := range user.Nodes { 193 if strings.HasSuffix(keyprint.Key, fingerprint) { 194 parts := strings.Split(user.Key, "/") 195 username := parts[len(parts)-1] 196 log.Infof(c, "Found user %s for fingerprint %s", username, fingerprint) 197 return username, nil 198 } 199 } 200 } 201 202 return "", fmt.Errorf("User not found for fingerprint %s", fingerprint) 203 } 204 205 // StoreHostKeys stores SSH hostkeys locally. 206 // 207 // First it tries to fetch them from etcd. If the keys are not present there, 208 // it generates new ones and then puts them into etcd. 209 // 210 // Params: 211 // - client(EtcdGetterSetter) 212 // - ciphers([]string): A list of ciphers to generate. Defaults are dsa, 213 // ecdsa, ed25519 and rsa. 214 // - basepath (string): Base path in etcd (ETCD_PATH). 215 // Returns: 216 // 217 func StoreHostKeys(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 218 defaultCiphers := []string{"rsa", "dsa", "ecdsa", "ed25519"} 219 client := p.Get("client", nil).(GetterSetter) 220 ciphers := p.Get("ciphers", defaultCiphers).([]string) 221 basepath := p.Get("basepath", "/deis/builder").(string) 222 223 res, err := client.Get("sshHostKey", false, false) 224 if err != nil || res.Node == nil { 225 log.Infof(c, "Could not get SSH host key from etcd. Generating new ones.") 226 if err := genSSHKeys(c); err != nil { 227 log.Err(c, "Failed to generate SSH keys. Aborting.") 228 return nil, err 229 } 230 if err := keysToEtcd(c, client, ciphers, basepath); err != nil { 231 return nil, err 232 } 233 } else if err := keysToLocal(c, client, ciphers, basepath); err != nil { 234 log.Infof(c, "Fetching SSH host keys from etcd.") 235 return nil, err 236 } 237 238 return nil, nil 239 } 240 241 // keysToLocal copies SSH host keys from etcd to the local file system. 242 // 243 // This only fails if the main key, sshHostKey cannot be stored or retrieved. 244 func keysToLocal(c cookoo.Context, client Getter, ciphers []string, etcdPath string) error { 245 lpath := "/etc/ssh/ssh_host_%s_key" 246 privkey := "%s/sshHost%sKey" 247 for _, cipher := range ciphers { 248 path := fmt.Sprintf(lpath, cipher) 249 key := fmt.Sprintf(privkey, etcdPath, cipher) 250 res, err := client.Get(key, false, false) 251 if err != nil || res.Node == nil { 252 continue 253 } 254 255 content := res.Node.Value 256 if err := ioutil.WriteFile(path, []byte(content), 0600); err != nil { 257 log.Errf(c, "Error writing ssh host key file: %s", err) 258 } 259 } 260 261 // Now get generic key. 262 res, err := client.Get("sshHostKey", false, false) 263 if err != nil || res.Node == nil { 264 return fmt.Errorf("Failed to get sshHostKey from etcd. %v", err) 265 } 266 267 content := res.Node.Value 268 if err := ioutil.WriteFile("/etc/ssh/ssh_host_key", []byte(content), 0600); err != nil { 269 log.Errf(c, "Error writing ssh host key file: %s", err) 270 return err 271 } 272 return nil 273 } 274 275 // keysToEtcd copies local keys into etcd. 276 // 277 // It only fails if it cannot copy ssh_host_key to sshHostKey. All other 278 // abnormal conditions are logged, but not considered to be failures. 279 func keysToEtcd(c cookoo.Context, client Setter, ciphers []string, etcdPath string) error { 280 lpath := "/etc/ssh/ssh_host_%s_key" 281 privkey := "%s/sshHost%sKey" 282 for _, cipher := range ciphers { 283 path := fmt.Sprintf(lpath, cipher) 284 key := fmt.Sprintf(privkey, etcdPath, cipher) 285 content, err := ioutil.ReadFile(path) 286 if err != nil { 287 log.Infof(c, "No key named %s", path) 288 } else if _, err := client.Set(key, string(content), 0); err != nil { 289 log.Errf(c, "Could not store ssh key in etcd: %s", err) 290 } 291 } 292 // Now we set the generic key: 293 if content, err := ioutil.ReadFile("/etc/ssh/ssh_host_key"); err != nil { 294 log.Errf(c, "Could not read the ssh_host_key file.") 295 return err 296 } else if _, err := client.Set("sshHostKey", string(content), 0); err != nil { 297 log.Errf(c, "Failed to set sshHostKey in etcd.") 298 return err 299 } 300 return nil 301 } 302 303 // genSshKeys generates the default set of SSH host keys. 304 func genSSHKeys(c cookoo.Context) error { 305 // Generate a new key 306 out, err := exec.Command("ssh-keygen", "-A").CombinedOutput() 307 if err != nil { 308 log.Infof(c, "ssh-keygen: %s", out) 309 log.Errf(c, "Failed to generate SSH keys: %s", err) 310 return err 311 } 312 return nil 313 } 314 315 // UpdateHostPort intermittently notifies etcd of the builder's address. 316 // 317 // If `port` is specified, this will notify etcd at 10 second intervals that 318 // the builder is listening at $HOST:$PORT, setting the TTL to 20 seconds. 319 // 320 // This will notify etcd as long as the local sshd is running. 321 // 322 // Params: 323 // - base (string): The base path to write the data: $base/host and $base/port. 324 // - host (string): The hostname 325 // - port (string): The port 326 // - client (Setter): The client to use to write the data to etcd. 327 // - sshPid (int): The PID for SSHD. If SSHD dies, this stops notifying. 328 func UpdateHostPort(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 329 base := p.Get("base", "").(string) 330 host := p.Get("host", "").(string) 331 port := p.Get("port", "").(string) 332 client := p.Get("client", nil).(Setter) 333 sshd := p.Get("sshdPid", 0).(int) 334 335 // If no port is specified, we don't do anything. 336 if len(port) == 0 { 337 log.Infof(c, "No external port provided. Not publishing details.") 338 return false, nil 339 } 340 341 var ttl uint64 = 20 342 343 if err := setHostPort(client, base, host, port, ttl); err != nil { 344 log.Errf(c, "Etcd error setting host/port: %s", err) 345 return false, err 346 } 347 348 // Update etcd every ten seconds with this builder's host/port. 349 safely.GoDo(c, func() { 350 ticker := time.NewTicker(10 * time.Second) 351 for range ticker.C { 352 //log.Infof(c, "Setting SSHD host/port") 353 if _, err := os.FindProcess(sshd); err != nil { 354 log.Errf(c, "Lost SSHd process: %s", err) 355 break 356 } else { 357 if err := setHostPort(client, base, host, port, ttl); err != nil { 358 log.Errf(c, "Etcd error setting host/port: %s", err) 359 break 360 } 361 } 362 } 363 ticker.Stop() 364 }) 365 366 return true, nil 367 } 368 369 func setHostPort(client Setter, base, host, port string, ttl uint64) error { 370 if _, err := client.Set(base+"/host", host, ttl); err != nil { 371 return err 372 } 373 if _, err := client.Set(base+"/port", port, ttl); err != nil { 374 return err 375 } 376 return nil 377 } 378 379 // MakeDir makes a directory in Etcd. 380 // 381 // Params: 382 // - client (EtcdDirCreator): Etcd client 383 // - path (string): The name of the directory to create. 384 // - ttl (uint64): Time to live. 385 // Returns: 386 // *etcd.Response 387 func MakeDir(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 388 name := p.Get("path", "").(string) 389 ttl := p.Get("ttl", uint64(0)).(uint64) 390 cli, ok := p.Has("client") 391 if !ok { 392 return nil, errors.New("No Etcd client found.") 393 } 394 client := cli.(DirCreator) 395 396 if len(name) == 0 { 397 return false, errors.New("Expected directory name to be more than zero characters.") 398 } 399 400 res, err := client.CreateDir(name, ttl) 401 if err != nil { 402 return res, &cookoo.RecoverableError{err.Error()} 403 } 404 405 return res, nil 406 } 407 408 // Watch watches a given path, and executes a git check-repos for each event. 409 // 410 // It starts the watcher and then returns. The watcher runs on its own 411 // goroutine. To stop the watching, send the returned channel a bool. 412 // 413 // Params: 414 // - client (Watcher): An Etcd client. 415 // - path (string): The path to watch 416 // 417 // Returns: 418 // - chan bool: Send this a message to stop the watcher. 419 func Watch(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { 420 // etcdctl -C $ETCD watch --recursive /deis/services 421 path := p.Get("path", "/deis/services").(string) 422 cli, ok := p.Has("client") 423 if !ok { 424 return nil, errors.New("No etcd client found.") 425 } 426 client := cli.(Watcher) 427 428 // Stupid hack because etcd watch seems to be broken, constantly complaining 429 // that the JSON it received is malformed. 430 safely.GoDo(c, func() { 431 for { 432 response, err := client.Watch(path, 0, true, nil, nil) 433 if err != nil { 434 log.Errf(c, "Etcd Watch failed: %s", err) 435 time.Sleep(50 * time.Millisecond) 436 continue 437 } 438 439 if response.Node == nil { 440 log.Infof(c, "Unexpected Etcd message: %v", response) 441 } 442 git := exec.Command("/home/git/check-repos") 443 if out, err := git.CombinedOutput(); err != nil { 444 log.Errf(c, "Failed git check-repos: %s", err) 445 log.Infof(c, "Output: %s", out) 446 } 447 } 448 449 }) 450 451 return nil, nil 452 453 /* Watch seems to be broken. So we do this stupid watch loop instead. 454 receiver := make(chan *etcd.Response) 455 stop := make(chan bool) 456 // Buffer the channels so that we don't hang waiting for go-etcd to 457 // read off the channel. 458 stopetcd := make(chan bool, 1) 459 stopwatch := make(chan bool, 1) 460 461 462 // Watch for errors. 463 safely.GoDo(c, func() { 464 // When a receiver is passed in, no *Response is ever returned. Instead, 465 // Watch acts like an error channel, and receiver gets all of the messages. 466 _, err := client.Watch(path, 0, true, receiver, stopetcd) 467 if err != nil { 468 log.Infof(c, "Watcher stopped with error '%s'", err) 469 stopwatch <- true 470 //close(stopwatch) 471 } 472 }) 473 // Watch for events 474 safely.GoDo(c, func() { 475 for { 476 select { 477 case msg := <-receiver: 478 if msg.Node != nil { 479 log.Infof(c, "Received notification %s for %s", msg.Action, msg.Node.Key) 480 } else { 481 log.Infof(c, "Received unexpected etcd message: %v", msg) 482 } 483 git := exec.Command("/home/git/check-repos") 484 if out, err := git.CombinedOutput(); err != nil { 485 log.Errf(c, "Failed git check-repos: %s", err) 486 log.Infof(c, "Output: %s", out) 487 } 488 case <-stopwatch: 489 c.Logf("debug", "Received signal to stop watching events.") 490 return 491 } 492 } 493 }) 494 // Fan out stop requests. 495 safely.GoDo(c, func() { 496 <-stop 497 stopwatch <- true 498 stopetcd <- true 499 close(stopwatch) 500 close(stopetcd) 501 }) 502 503 return stop, nil 504 */ 505 } 506 507 // checkRetry overrides etcd.DefaultCheckRetry. 508 // 509 // It adds configurable number of retries and configurable timesouts. 510 func checkRetry(c *etcd.Cluster, numReqs int, last http.Response, err error) error { 511 if numReqs > retryCycles*len(c.Machines) { 512 return fmt.Errorf("Tried and failed %d cluster connections: %s", retryCycles, err) 513 } 514 515 switch last.StatusCode { 516 case 0: 517 return nil 518 case 500: 519 time.Sleep(retrySleep) 520 return nil 521 case 200: 522 return nil 523 default: 524 return fmt.Errorf("Unhandled HTTP Error: %s %d", last.Status, last.StatusCode) 525 } 526 }