github.com/billybanfield/evergreen@v0.0.0-20170525200750-eeee692790f7/hostinit/setup.go (about) 1 package hostinit 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "path/filepath" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/evergreen-ci/evergreen" 16 "github.com/evergreen-ci/evergreen/alerts" 17 "github.com/evergreen-ci/evergreen/cloud" 18 "github.com/evergreen-ci/evergreen/cloud/providers" 19 "github.com/evergreen-ci/evergreen/command" 20 "github.com/evergreen-ci/evergreen/hostutil" 21 "github.com/evergreen-ci/evergreen/model" 22 "github.com/evergreen-ci/evergreen/model/event" 23 "github.com/evergreen-ci/evergreen/model/host" 24 "github.com/evergreen-ci/evergreen/model/user" 25 "github.com/evergreen-ci/evergreen/notify" 26 "github.com/evergreen-ci/evergreen/util" 27 "github.com/mongodb/grip" 28 "github.com/pkg/errors" 29 "gopkg.in/mgo.v2" 30 ) 31 32 const ( 33 SCPTimeout = time.Minute 34 setupScriptName = "setup.sh" 35 teardownScriptName = "teardown.sh" 36 ) 37 38 // Error indicating another hostinit got to the setup first. 39 var ( 40 ErrHostAlreadyInitializing = errors.New("Host already initializing") 41 ) 42 43 // Longest duration allowed for running setup script. 44 var ( 45 SSHTimeoutSeconds = int64(300) // 5 minutes 46 ) 47 48 // HostInit is responsible for running setup scripts on Evergreen hosts. 49 type HostInit struct { 50 Settings *evergreen.Settings 51 } 52 53 // setupReadyHosts runs the distro setup script of all hosts that are up and reachable. 54 func (init *HostInit) setupReadyHosts() error { 55 // set SSH timeout duration 56 if timeoutSecs := init.Settings.HostInit.SSHTimeoutSeconds; timeoutSecs <= 0 { 57 grip.Warningf("SSH timeout set to %vs (<= 0s) using %vs instead", timeoutSecs, SSHTimeoutSeconds) 58 } else { 59 SSHTimeoutSeconds = timeoutSecs 60 } 61 62 // find all hosts in the uninitialized state 63 uninitializedHosts, err := host.Find(host.IsUninitialized) 64 if err != nil { 65 return errors.Wrap(err, "error fetching uninitialized hosts") 66 } 67 68 grip.Debugf("There are %d uninitialized hosts", len(uninitializedHosts)) 69 70 // used for making sure we don't exit before a setup script is done 71 wg := &sync.WaitGroup{} 72 73 for _, h := range uninitializedHosts { 74 75 // check whether or not the host is ready for its setup script to be run 76 ready, err := init.IsHostReady(&h) 77 if err != nil { 78 grip.Infof("Error checking host %s for readiness: %+v", h.Id, err) 79 continue 80 } 81 82 // if the host isn't ready (for instance, it might not be up yet), skip it 83 if !ready { 84 grip.Debugf("Host %s not ready for setup", h.Id) 85 continue 86 } 87 88 grip.Infoln("Running setup script for host", h.Id) 89 90 // kick off the setup, in its own goroutine, so pending setups don't have 91 // to wait for it to finish 92 wg.Add(1) 93 go func(h host.Host) { 94 95 if err := init.ProvisionHost(&h); err != nil { 96 grip.Errorf("Error provisioning host %s: %+v", h.Id, err) 97 98 // notify the admins of the failure 99 subject := fmt.Sprintf("%v Evergreen provisioning failure on %v", 100 notify.ProvisionFailurePreface, h.Distro.Id) 101 hostLink := fmt.Sprintf("%v/host/%v", init.Settings.Ui.Url, h.Id) 102 message := fmt.Sprintf("Provisioning failed on %v host -- %v: see %v", 103 h.Distro.Id, h.Id, hostLink) 104 if err := notify.NotifyAdmins(subject, message, init.Settings); err != nil { 105 grip.Errorf("Error sending email: %+v", err) 106 } 107 } 108 109 wg.Done() 110 111 }(h) 112 113 } 114 115 // let all setup routines finish 116 wg.Wait() 117 118 return nil 119 } 120 121 // IsHostReady returns whether or not the specified host is ready for its setup script 122 // to be run. 123 func (init *HostInit) IsHostReady(host *host.Host) (bool, error) { 124 125 // fetch the appropriate cloud provider for the host 126 cloudMgr, err := providers.GetCloudManager(host.Distro.Provider, init.Settings) 127 if err != nil { 128 return false, errors.Wrapf(err, "failed to get cloud manager for provider %s", 129 host.Distro.Provider) 130 } 131 132 // ask for the instance's status 133 hostStatus, err := cloudMgr.GetInstanceStatus(host) 134 if err != nil { 135 return false, errors.Wrapf(err, "error checking instance status of host %s", host.Id) 136 } 137 138 grip.Debugf("Checking readiness for %s with DNS %s. has cloud status %s and local status: %s", 139 host.Id, host.Host, hostStatus, host.Status) 140 141 // if the host has failed, terminate it and return that this host is not ready 142 if hostStatus == cloud.StatusFailed { 143 err = errors.WithStack(cloudMgr.TerminateInstance(host)) 144 if err != nil { 145 return false, err 146 } 147 return false, errors.Errorf("host %s terminated due to failure before setup", host.Id) 148 } 149 150 // if the host isn't up yet, we can't do anything 151 if hostStatus != cloud.StatusRunning { 152 return false, nil 153 } 154 155 // set the host's dns name, if it is not set 156 if host.Host == "" { 157 var hostDNS string 158 159 // get the DNS name for the host 160 hostDNS, err = cloudMgr.GetDNSName(host) 161 if err != nil { 162 return false, errors.Wrapf(err, "error checking DNS name for host %s", host.Id) 163 } 164 165 // sanity check for the host DNS name 166 if hostDNS == "" { 167 return false, errors.Errorf("instance %s is running but not returning a DNS name", 168 host.Id) 169 } 170 171 // update the host's DNS name 172 if err = host.SetDNSName(hostDNS); err != nil { 173 return false, errors.Wrapf(err, "error setting DNS name for host %s", host.Id) 174 } 175 } 176 177 // check if the host is reachable via SSH 178 cloudHost, err := providers.GetCloudHost(host, init.Settings) 179 if err != nil { 180 return false, errors.Wrapf(err, "failed to get cloud host for %s", host.Id) 181 } 182 reachable, err := cloudHost.IsSSHReachable() 183 if err != nil { 184 return false, errors.Wrapf(err, "error checking if host %s is reachable", host.Id) 185 } 186 187 // at this point, we can run the setup if the host is reachable 188 return reachable, nil 189 } 190 191 // setupHost runs the specified setup script for an individual host. Returns 192 // the output from running the script remotely, as well as any error that 193 // occurs. If the script exits with a non-zero exit code, the error will be non-nil. 194 func (init *HostInit) setupHost(targetHost *host.Host) (string, error) { 195 // fetch the appropriate cloud provider for the host 196 cloudMgr, err := providers.GetCloudManager(targetHost.Provider, init.Settings) 197 if err != nil { 198 return "", errors.Wrapf(err, 199 "failed to get cloud manager for host %s with provider %s", 200 targetHost.Id, targetHost.Provider) 201 } 202 203 // mark the host as initializing 204 if err = targetHost.SetInitializing(); err != nil { 205 if err == mgo.ErrNotFound { 206 return "", ErrHostAlreadyInitializing 207 } else { 208 return "", errors.Wrapf(err, "database error") 209 } 210 } 211 212 /* TESTING ONLY 213 setupDebugSSHTunnel(path_to_ssh_key, targetHost.User, targetHost.Host) 214 */ 215 216 // run the function scheduled for when the host is up 217 err = cloudMgr.OnUp(targetHost) 218 if err != nil { 219 // if this fails it is probably due to an API hiccup, so we keep going. 220 grip.Warningf("OnUp callback failed for host '%v': '%+v'", targetHost.Id, err) 221 } 222 cloudHost, err := providers.GetCloudHost(targetHost, init.Settings) 223 if err != nil { 224 return "", errors.Wrapf(err, "failed to get cloud host for %s", targetHost.Id) 225 } 226 sshOptions, err := cloudHost.GetSSHOptions() 227 if err != nil { 228 return "", errors.Wrapf(err, "error getting ssh options for host %s", targetHost.Id) 229 } 230 231 if targetHost.Distro.Teardown != "" { 232 err = init.copyScript(targetHost, teardownScriptName, targetHost.Distro.Teardown) 233 if err != nil { 234 return "", errors.Wrapf(err, "error copying script %v to host %v", 235 teardownScriptName, targetHost.Id) 236 } 237 } 238 239 if targetHost.Distro.Setup != "" { 240 err = init.copyScript(targetHost, setupScriptName, targetHost.Distro.Setup) 241 if err != nil { 242 return "", errors.Errorf("error copying script %v to host %v: %v", 243 setupScriptName, targetHost.Id, err) 244 } 245 logs, err := hostutil.RunRemoteScript(targetHost, setupScriptName, sshOptions) 246 if err != nil { 247 return logs, errors.Errorf("error running setup script over ssh: %v", err) 248 } 249 return logs, nil 250 } 251 return "", nil 252 } 253 254 // copyScript writes a given script as file "name" to the target host. This works 255 // by creating a local copy of the script on the runner's machine, scping it over 256 // then removing the local copy. 257 func (init *HostInit) copyScript(target *host.Host, name, script string) error { 258 // parse the hostname into the user, host and port 259 hostInfo, err := util.ParseSSHInfo(target.Host) 260 if err != nil { 261 return err 262 } 263 user := target.Distro.User 264 if hostInfo.User != "" { 265 user = hostInfo.User 266 } 267 268 // create a temp file for the script 269 file, err := ioutil.TempFile("", name) 270 if err != nil { 271 return errors.Wrap(err, "error creating temporary script file") 272 } 273 defer func() { 274 grip.Error(file.Close()) 275 grip.Error(os.Remove(file.Name())) 276 }() 277 278 expanded, err := init.expandScript(script) 279 if err != nil { 280 return errors.Wrapf(err, "error expanding script for host %s", target.Id) 281 } 282 if _, err := io.WriteString(file, expanded); err != nil { 283 return errors.Wrap(err, "error writing local script") 284 } 285 286 cloudHost, err := providers.GetCloudHost(target, init.Settings) 287 if err != nil { 288 return errors.Wrapf(err, "failed to get cloud host for %s", target.Id) 289 } 290 sshOptions, err := cloudHost.GetSSHOptions() 291 if err != nil { 292 return errors.Wrapf(err, "error getting ssh options for host %v", target.Id) 293 } 294 295 var scpCmdStderr bytes.Buffer 296 scpCmd := &command.ScpCommand{ 297 Source: file.Name(), 298 Dest: name, 299 Stdout: &scpCmdStderr, 300 Stderr: &scpCmdStderr, 301 RemoteHostName: hostInfo.Hostname, 302 User: user, 303 Options: append([]string{"-P", hostInfo.Port}, sshOptions...), 304 } 305 err = util.RunFunctionWithTimeout(scpCmd.Run, SCPTimeout) 306 if err != nil { 307 if err == util.ErrTimedOut { 308 grip.Warning(scpCmd.Stop()) 309 return errors.Wrap(err, "scp-ing script timed out") 310 } 311 return errors.Wrapf(err, "error (%v) copying script to remote machine", 312 scpCmdStderr.String()) 313 } 314 return nil 315 } 316 317 // Build the setup script that will need to be run on the specified host. 318 func (init *HostInit) expandScript(s string) (string, error) { 319 // replace expansions in the script 320 exp := command.NewExpansions(init.Settings.Expansions) 321 script, err := exp.ExpandString(s) 322 if err != nil { 323 return "", errors.Wrap(err, "expansions error") 324 } 325 return script, err 326 } 327 328 // Provision the host, and update the database accordingly. 329 func (init *HostInit) ProvisionHost(h *host.Host) error { 330 331 // run the setup script 332 grip.Infoln("Setting up host", h.Id) 333 output, err := init.setupHost(h) 334 335 // deal with any errors that occurred while running the setup 336 if err != nil { 337 grip.Errorf("Error running setup script: %+v", err) 338 339 // another hostinit process beat us there 340 if err == ErrHostAlreadyInitializing { 341 grip.Debugln("Attempted to initialize already initializing host %s", h.Id) 342 return nil 343 } 344 345 grip.Warning(alerts.RunHostProvisionFailTriggers(h)) 346 event.LogProvisionFailed(h.Id, output) 347 348 // setup script failed, mark the host's provisioning as failed 349 if err := h.SetUnprovisioned(); err != nil { 350 grip.Errorf("unprovisioning host %s failed: %+v", h.Id, err) 351 } 352 353 return errors.Wrapf(err, "error initializing host %s", h.Id) 354 } 355 356 grip.Infof("Setup complete for host %s", h.Id) 357 358 if h.ProvisionOptions != nil && 359 h.ProvisionOptions.LoadCLI && 360 h.ProvisionOptions.OwnerId != "" { 361 grip.Infof("Uploading client binary to host %s", h.Id) 362 lcr, err := init.LoadClient(h) 363 if err != nil { 364 grip.Errorf("Failed to load client binary onto host %s: %+v", h.Id, err) 365 } else if err == nil && len(h.ProvisionOptions.TaskId) > 0 { 366 grip.Infof("Fetching data for task %s onto host %s", h.ProvisionOptions.TaskId, h.Id) 367 err = init.fetchRemoteTaskData(h.ProvisionOptions.TaskId, lcr.BinaryPath, lcr.ConfigPath, h) 368 grip.ErrorWhenf(err != nil, "Failed to fetch data onto host %s: %v", h.Id, err) 369 } 370 } 371 372 // the setup was successful. update the host accordingly in the database 373 if err := h.MarkAsProvisioned(); err != nil { 374 return errors.Wrapf(err, "error marking host %s as provisioned", h.Id) 375 } 376 377 grip.Infof("Host %s successfully provisioned", h.Id) 378 379 return nil 380 } 381 382 // LocateCLIBinary returns the (absolute) path to the CLI binary for the given architecture, based 383 // on the system settings. Returns an error if the file does not exist. 384 func LocateCLIBinary(settings *evergreen.Settings, architecture string) (string, error) { 385 clientsSubDir := "clients" 386 if settings.ClientBinariesDir != "" { 387 clientsSubDir = settings.ClientBinariesDir 388 } 389 390 binaryName := "evergreen" 391 if strings.HasPrefix(architecture, "windows") { 392 binaryName += ".exe" 393 } 394 395 path := filepath.Join(clientsSubDir, architecture, binaryName) 396 if !filepath.IsAbs(clientsSubDir) { 397 path = filepath.Join(evergreen.FindEvergreenHome(), path) 398 } 399 400 _, err := os.Stat(path) 401 if err != nil { 402 return path, errors.WithStack(err) 403 } 404 405 path, err = filepath.Abs(path) 406 return path, errors.WithStack(err) 407 } 408 409 // LoadClientResult indicates the locations on a target host where the CLI binary and it's config 410 // file have been written to. 411 type LoadClientResult struct { 412 BinaryPath string 413 ConfigPath string 414 } 415 416 // LoadClient places the evergreen command line client on the host, places a copy of the user's 417 // settings onto the host, and makes the binary appear in the $PATH when the user logs in. 418 // If successful, returns an instance of LoadClientResult which contains the paths where the 419 // binary and config file were written to. 420 func (init *HostInit) LoadClient(target *host.Host) (*LoadClientResult, error) { 421 // Make sure we have the binary we want to upload - if it hasn't been built for the given 422 // architecture, fail early 423 cliBinaryPath, err := LocateCLIBinary(init.Settings, target.Distro.Arch) 424 if err != nil { 425 return nil, errors.Wrapf(err, "couldn't locate CLI binary for upload") 426 } 427 if target.ProvisionOptions == nil { 428 return nil, errors.New("ProvisionOptions is nil") 429 } 430 if target.ProvisionOptions.OwnerId == "" { 431 return nil, errors.New("OwnerId not set") 432 } 433 434 // get the information about the owner of the host 435 owner, err := user.FindOne(user.ById(target.ProvisionOptions.OwnerId)) 436 if err != nil { 437 return nil, errors.Wrapf(err, "couldn't fetch owner %v for host", target.ProvisionOptions.OwnerId) 438 } 439 440 // 1. mkdir the destination directory on the host, 441 // and modify ~/.profile so the target binary will be on the $PATH 442 targetDir := "cli_bin" 443 hostSSHInfo, err := util.ParseSSHInfo(target.Host) 444 if err != nil { 445 return nil, errors.Wrapf(err, "error parsing ssh info %s", target.Host) 446 } 447 448 cloudHost, err := providers.GetCloudHost(target, init.Settings) 449 if err != nil { 450 return nil, errors.Wrapf(err, "Failed to get cloud host for %s", target.Id) 451 } 452 sshOptions, err := cloudHost.GetSSHOptions() 453 if err != nil { 454 return nil, errors.Wrapf(err, "Error getting ssh options for host %v", target.Id) 455 } 456 sshOptions = append(sshOptions, "-o", "UserKnownHostsFile=/dev/null") 457 458 mkdirOutput := &util.CappedWriter{&bytes.Buffer{}, 1024 * 1024} 459 460 // Create the directory for the binary to be uploaded into. 461 // Also, make a best effort to add the binary's location to $PATH upon login. If we can't do 462 // this successfully, the command will still succeed, it just means that the user will have to 463 // use an absolute path (or manually set $PATH in their shell) to execute it. 464 makeShellCmd := &command.RemoteCommand{ 465 CmdString: fmt.Sprintf("mkdir -m 777 -p ~/%s && (echo 'PATH=$PATH:~/%s' >> ~/.profile || true; echo 'PATH=$PATH:~/%s' >> ~/.bash_profile || true)", targetDir, targetDir, targetDir), 466 Stdout: mkdirOutput, 467 Stderr: mkdirOutput, 468 RemoteHostName: hostSSHInfo.Hostname, 469 User: target.User, 470 Options: append([]string{"-p", hostSSHInfo.Port}, sshOptions...), 471 } 472 473 scpOut := &util.CappedWriter{&bytes.Buffer{}, 1024 * 1024} 474 // run the make shell command with a timeout 475 err = util.RunFunctionWithTimeout(makeShellCmd.Run, 30*time.Second) 476 if err != nil { 477 return nil, errors.Wrapf(err, "error running setup command for cli, %v", 478 mkdirOutput.Buffer.String()) 479 } 480 // place the binary into the directory 481 scpSetupCmd := &command.ScpCommand{ 482 Source: cliBinaryPath, 483 Dest: fmt.Sprintf("~/%s/evergreen", targetDir), 484 Stdout: scpOut, 485 Stderr: scpOut, 486 RemoteHostName: hostSSHInfo.Hostname, 487 User: target.User, 488 Options: append([]string{"-P", hostSSHInfo.Port}, sshOptions...), 489 } 490 491 // run the command to scp the setup script with a timeout 492 err = util.RunFunctionWithTimeout(scpSetupCmd.Run, 3*time.Minute) 493 if err != nil { 494 return nil, errors.Wrapf(err, "error running SCP command for cli, %v: '%v'", scpOut.Buffer.String()) 495 } 496 497 // 4. Write a settings file for the user that owns the host, and scp it to the directory 498 outputStruct := model.CLISettings{ 499 User: owner.Id, 500 APIKey: owner.APIKey, 501 APIServerHost: init.Settings.ApiUrl + "/api", 502 UIServerHost: init.Settings.Ui.Url, 503 } 504 outputJSON, err := json.Marshal(outputStruct) 505 if err != nil { 506 return nil, errors.WithStack(err) 507 } 508 509 tempFileName, err := util.WriteTempFile("", outputJSON) 510 if err != nil { 511 return nil, errors.WithStack(err) 512 } 513 defer os.Remove(tempFileName) 514 515 scpYmlCommand := &command.ScpCommand{ 516 Source: tempFileName, 517 Dest: fmt.Sprintf("~/%s/.evergreen.yml", targetDir), 518 Stdout: scpOut, 519 Stderr: scpOut, 520 RemoteHostName: hostSSHInfo.Hostname, 521 User: target.User, 522 Options: append([]string{"-P", hostSSHInfo.Port}, sshOptions...), 523 } 524 err = util.RunFunctionWithTimeout(scpYmlCommand.Run, 30*time.Second) 525 if err != nil { 526 return nil, errors.Wrapf(err, "error running SCP command for evergreen.yml, %v", scpOut.Buffer.String()) 527 } 528 529 return &LoadClientResult{ 530 BinaryPath: fmt.Sprintf("~/%s/evergreen", targetDir), 531 ConfigPath: fmt.Sprintf("~/%s/.evergreen.yml", targetDir), 532 }, nil 533 } 534 535 func (init *HostInit) fetchRemoteTaskData(taskId, cliPath, confPath string, target *host.Host) error { 536 hostSSHInfo, err := util.ParseSSHInfo(target.Host) 537 if err != nil { 538 return errors.Wrapf(err, "error parsing ssh info %s", target.Host) 539 } 540 541 cloudHost, err := providers.GetCloudHost(target, init.Settings) 542 if err != nil { 543 return errors.Wrapf(err, "Failed to get cloud host for %v", target.Id) 544 } 545 sshOptions, err := cloudHost.GetSSHOptions() 546 if err != nil { 547 return errors.Wrapf(err, "Error getting ssh options for host %v", target.Id) 548 } 549 sshOptions = append(sshOptions, "-o", "UserKnownHostsFile=/dev/null") 550 551 /* TESTING ONLY 552 setupDebugSSHTunnel(path_to_ssh_keys, target.User, hostSSHInfo.Hostname) 553 */ 554 555 // When testing, use this writer to force a copy of the output to be written to standard out so 556 // that remote command failures also show up in server log output. 557 //cmdOutput := io.MultiWriter(&util.CappedWriter{&bytes.Buffer{}, 1024 * 1024}, os.Stdout) 558 559 cmdOutput := &util.CappedWriter{&bytes.Buffer{}, 1024 * 1024} 560 makeShellCmd := &command.RemoteCommand{ 561 CmdString: fmt.Sprintf("%s -c '%s' fetch -t %s --source --artifacts --dir='%s'", cliPath, confPath, taskId, target.Distro.WorkDir), 562 Stdout: cmdOutput, 563 Stderr: cmdOutput, 564 RemoteHostName: hostSSHInfo.Hostname, 565 User: target.User, 566 Options: append([]string{"-p", hostSSHInfo.Port}, sshOptions...), 567 } 568 569 // run the make shell command with a timeout 570 return errors.WithStack(util.RunFunctionWithTimeout(makeShellCmd.Run, 15*time.Minute)) 571 }