github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/taskrunner/host_gateway.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"math/rand"
     8  	"path/filepath"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/evergreen-ci/evergreen"
    13  	"github.com/evergreen-ci/evergreen/cloud/providers"
    14  	"github.com/evergreen-ci/evergreen/command"
    15  	"github.com/evergreen-ci/evergreen/model/distro"
    16  	"github.com/evergreen-ci/evergreen/model/host"
    17  	"github.com/evergreen-ci/evergreen/util"
    18  	"github.com/mongodb/grip"
    19  	"github.com/pkg/errors"
    20  )
    21  
    22  const (
    23  	MakeShellTimeout  = 2 * time.Minute
    24  	SCPTimeout        = 3 * time.Minute
    25  	StartAgentTimeout = 2 * time.Minute
    26  	agentFile         = "agent"
    27  )
    28  
    29  // HostGateway is responsible for kicking off tasks on remote machines.
    30  type HostGateway interface {
    31  	// run the specified task on the specified host, return the revision of the
    32  	// agent running the task on that host
    33  	StartAgentOnHost(*evergreen.Settings, host.Host) error
    34  	// gets the current revision of the agent
    35  	GetAgentRevision() (string, error)
    36  }
    37  
    38  // Implementation of the HostGateway that builds and copies over the MCI
    39  // agent to run tasks.
    40  type AgentHostGateway struct {
    41  	// Destination directory for the agent executables
    42  	ExecutablesDir string
    43  }
    44  
    45  // Start the task specified, on the host specified.  First runs any necessary
    46  // preparation on the remote machine, then kicks off the agent process on the
    47  // machine.
    48  // Returns an error if any step along the way fails.
    49  func (agbh *AgentHostGateway) StartAgentOnHost(settings *evergreen.Settings, hostObj host.Host) error {
    50  
    51  	// get the host's SSH options
    52  	cloudHost, err := providers.GetCloudHost(&hostObj, settings)
    53  	if err != nil {
    54  		return errors.Wrapf(err, "Failed to get cloud host for %s", hostObj.Id)
    55  	}
    56  	sshOptions, err := cloudHost.GetSSHOptions()
    57  	if err != nil {
    58  		return errors.Wrapf(err, "Error getting ssh options for host %s", hostObj.Id)
    59  	}
    60  
    61  	// prep the remote host
    62  	grip.Infof("Prepping remote host %v...", hostObj.Id)
    63  	agentRevision, err := agbh.prepRemoteHost(hostObj, sshOptions)
    64  	if err != nil {
    65  		return errors.Wrapf(err, "error prepping remote host %s", hostObj.Id)
    66  	}
    67  	grip.Infof("Prepping host %v finished successfully", hostObj.Id)
    68  
    69  	// start the agent on the remote machine
    70  	grip.Infof("Starting agent on host %v", hostObj.Id)
    71  
    72  	// generate the host secret if none exists
    73  	if hostObj.Secret == "" {
    74  		if err = hostObj.CreateSecret(); err != nil {
    75  			return errors.Wrapf(err, "creating secret for %s", hostObj.Id)
    76  		}
    77  	}
    78  
    79  	err = startAgentOnRemote(settings.ApiUrl, &hostObj, sshOptions)
    80  	if err != nil {
    81  		return errors.WithStack(err)
    82  	}
    83  	grip.Infof("Agent successfully started for host %v", hostObj.Id)
    84  
    85  	err = hostObj.SetAgentRevision(agentRevision)
    86  	if err != nil {
    87  		return errors.WithStack(err)
    88  	}
    89  	return nil
    90  }
    91  
    92  // Gets the git revision of the currently built agent
    93  func (agbh *AgentHostGateway) GetAgentRevision() (string, error) {
    94  
    95  	versionFile := filepath.Join(agbh.ExecutablesDir, "version")
    96  	hashBytes, err := ioutil.ReadFile(versionFile)
    97  	if err != nil {
    98  		return "", errors.Wrap(err, "error reading agent version file")
    99  	}
   100  
   101  	return strings.TrimSpace(string(hashBytes)), nil
   102  }
   103  
   104  // executableSubPath returns the directory containing the compiled agents.
   105  func executableSubPath(id string) (string, error) {
   106  
   107  	// get the full distro info, so we can figure out the architecture
   108  	d, err := distro.FindOne(distro.ById(id))
   109  	if err != nil {
   110  		return "", errors.Wrapf(err, "error finding distro %v", id)
   111  	}
   112  
   113  	mainName := "main"
   114  	if strings.HasPrefix(d.Arch, "windows") {
   115  		mainName = "main.exe"
   116  	}
   117  
   118  	return filepath.Join(d.Arch, mainName), nil
   119  }
   120  
   121  func newCappedOutputLog() *util.CappedWriter {
   122  	// store up to 1MB of streamed command output to print if a command fails
   123  	return &util.CappedWriter{
   124  		Buffer:   &bytes.Buffer{},
   125  		MaxBytes: 1024 * 1024, // 1MB
   126  	}
   127  }
   128  
   129  // Prepare the remote machine to run a task.
   130  // This consists of:
   131  // 1. Creating the directories on the remote host where, according to the distro's settings,
   132  //    the agent should be placed.
   133  // 2. Copying the agent into that directory.
   134  func (agbh *AgentHostGateway) prepRemoteHost(hostObj host.Host, sshOptions []string) (string, error) {
   135  	// compute any info necessary to ssh into the host
   136  	hostInfo, err := util.ParseSSHInfo(hostObj.Host)
   137  	if err != nil {
   138  		return "", errors.Wrapf(err, "error parsing ssh info %v", hostObj.Host)
   139  	}
   140  
   141  	// first, create the necessary sandbox of directories on the remote machine
   142  	mkdirOutput := newCappedOutputLog()
   143  	makeShellCmd := &command.RemoteCommand{
   144  		Id:             fmt.Sprintf("agent_mkdir-%v", rand.Int()),
   145  		CmdString:      fmt.Sprintf("mkdir -m 777 -p %v", hostObj.Distro.WorkDir),
   146  		Stdout:         mkdirOutput,
   147  		Stderr:         mkdirOutput,
   148  		RemoteHostName: hostInfo.Hostname,
   149  		User:           hostObj.User,
   150  		Options:        append([]string{"-p", hostInfo.Port}, sshOptions...),
   151  		Background:     false,
   152  	}
   153  	grip.Infof("Directories command: '%#v'", makeShellCmd)
   154  
   155  	// run the make shell command with a timeout
   156  	err = util.RunFunctionWithTimeout(makeShellCmd.Run, MakeShellTimeout)
   157  	grip.Notice(makeShellCmd.Stop())
   158  	if err != nil {
   159  		// if it timed out, kill the command
   160  		if err == util.ErrTimedOut {
   161  			return "", errors.Errorf("creating remote directories timed out: %v",
   162  				mkdirOutput.String())
   163  		}
   164  		return "", errors.Wrapf(err, "error creating directories on remote machine (%s)",
   165  			mkdirOutput.String())
   166  	}
   167  
   168  	// third, copy over the correct agent binary to the remote machine
   169  	execSubPath, err := executableSubPath(hostObj.Distro.Id)
   170  	if err != nil {
   171  		return "", errors.Wrap(err, "error computing subpath to executable")
   172  	}
   173  
   174  	scpAgentOutput := newCappedOutputLog()
   175  	scpAgentCmd := &command.ScpCommand{
   176  		Id:             fmt.Sprintf("scp%v", rand.Int()),
   177  		Source:         filepath.Join(agbh.ExecutablesDir, execSubPath),
   178  		Dest:           hostObj.Distro.WorkDir,
   179  		Stdout:         scpAgentOutput,
   180  		Stderr:         scpAgentOutput,
   181  		RemoteHostName: hostInfo.Hostname,
   182  		User:           hostObj.User,
   183  		Options:        append([]string{"-P", hostInfo.Port}, sshOptions...),
   184  	}
   185  
   186  	// get the agent's revision before scp'ing over the executable
   187  	preSCPAgentRevision, err := agbh.GetAgentRevision()
   188  	grip.Error(errors.Wrap(err, "Error getting pre scp agent revision"))
   189  
   190  	// run the command to scp the agent with a timeout
   191  	err = util.RunFunctionWithTimeout(scpAgentCmd.Run, SCPTimeout)
   192  	grip.Notice(scpAgentCmd.Stop())
   193  	if err != nil {
   194  		if err == util.ErrTimedOut {
   195  			return "", errors.Errorf("scp-ing agent binary timed out: %v", scpAgentOutput.String())
   196  		}
   197  		return "", errors.Errorf(
   198  			"error copying agent binary to remote machine (%v): %v", err, scpAgentOutput.String())
   199  	}
   200  
   201  	// get the agent's revision after scp'ing over the executable
   202  	postSCPAgentRevision, err := agbh.GetAgentRevision()
   203  	grip.Error(errors.Wrap(err, "Error getting post scp agent revision"))
   204  	grip.WarningWhenf(preSCPAgentRevision != postSCPAgentRevision,
   205  		"Agent revision was %v before scp but is now %v. Using previous revision %v for host %v",
   206  		preSCPAgentRevision, postSCPAgentRevision, preSCPAgentRevision, hostObj.Id)
   207  
   208  	return preSCPAgentRevision, nil
   209  }
   210  
   211  // Start the agent process on the specified remote host, and have it run the specified task.
   212  func startAgentOnRemote(apiURL string, hostObj *host.Host, sshOptions []string) error {
   213  	// the path to the agent binary on the remote machine
   214  	pathToExecutable := filepath.Join(hostObj.Distro.WorkDir, "main")
   215  
   216  	// build the command to run on the remote machine
   217  	remoteCmd := fmt.Sprintf(
   218  		`%v -api_server "%v" -host_id "%v" -host_secret "%v" -log_prefix "%v" -https_cert "%v"`,
   219  		pathToExecutable, apiURL, hostObj.Id, hostObj.Secret,
   220  		filepath.Join(hostObj.Distro.WorkDir, agentFile), "")
   221  	grip.Info(remoteCmd)
   222  
   223  	// compute any info necessary to ssh into the host
   224  	hostInfo, err := util.ParseSSHInfo(hostObj.Host)
   225  	if err != nil {
   226  		return errors.Wrapf(err, "error parsing ssh info %v", hostObj.Host)
   227  	}
   228  
   229  	// run the command to kick off the agent remotely
   230  	var startAgentLog bytes.Buffer
   231  	startAgentCmd := &command.RemoteCommand{
   232  		Id:             fmt.Sprintf("startagent-%s-%d", hostObj.Id, rand.Int()),
   233  		CmdString:      remoteCmd,
   234  		Stdout:         &startAgentLog,
   235  		Stderr:         &startAgentLog,
   236  		RemoteHostName: hostInfo.Hostname,
   237  		User:           hostObj.User,
   238  		Options:        append([]string{"-p", hostInfo.Port}, sshOptions...),
   239  		Background:     true,
   240  	}
   241  
   242  	// run the command to start the agent with a timeout
   243  	err = util.RunFunctionWithTimeout(
   244  		startAgentCmd.Run,
   245  		StartAgentTimeout,
   246  	)
   247  
   248  	// run cleanup regardless of what happens.
   249  	grip.Notice(startAgentCmd.Stop())
   250  
   251  	if err != nil {
   252  		if err == util.ErrTimedOut {
   253  			return errors.Errorf("starting agent timed out on %s", hostObj.Id)
   254  		}
   255  		return errors.Wrapf(err, "error starting agent (%v): %v", hostObj.Id, startAgentLog.String())
   256  	}
   257  	return nil
   258  }