github.com/justinjmoses/evergreen@v0.0.0-20170530173719-1d50e381ff0d/taskrunner/host_gateway.go (about) 1 package taskrunner 2 3 import ( 4 "bytes" 5 "fmt" 6 "io/ioutil" 7 "math/rand" 8 "path/filepath" 9 "strings" 10 "time" 11 12 "github.com/evergreen-ci/evergreen" 13 "github.com/evergreen-ci/evergreen/cloud/providers" 14 "github.com/evergreen-ci/evergreen/command" 15 "github.com/evergreen-ci/evergreen/model/distro" 16 "github.com/evergreen-ci/evergreen/model/host" 17 "github.com/evergreen-ci/evergreen/util" 18 "github.com/mongodb/grip" 19 "github.com/pkg/errors" 20 ) 21 22 const ( 23 MakeShellTimeout = 2 * time.Minute 24 SCPTimeout = 3 * time.Minute 25 StartAgentTimeout = 2 * time.Minute 26 agentFile = "agent" 27 ) 28 29 // HostGateway is responsible for kicking off tasks on remote machines. 30 type HostGateway interface { 31 // run the specified task on the specified host, return the revision of the 32 // agent running the task on that host 33 StartAgentOnHost(*evergreen.Settings, host.Host) error 34 // gets the current revision of the agent 35 GetAgentRevision() (string, error) 36 } 37 38 // Implementation of the HostGateway that builds and copies over the MCI 39 // agent to run tasks. 40 type AgentHostGateway struct { 41 // Destination directory for the agent executables 42 ExecutablesDir string 43 } 44 45 // Start the task specified, on the host specified. First runs any necessary 46 // preparation on the remote machine, then kicks off the agent process on the 47 // machine. 48 // Returns an error if any step along the way fails. 49 func (agbh *AgentHostGateway) StartAgentOnHost(settings *evergreen.Settings, hostObj host.Host) error { 50 51 // get the host's SSH options 52 cloudHost, err := providers.GetCloudHost(&hostObj, settings) 53 if err != nil { 54 return errors.Wrapf(err, "Failed to get cloud host for %s", hostObj.Id) 55 } 56 sshOptions, err := cloudHost.GetSSHOptions() 57 if err != nil { 58 return errors.Wrapf(err, "Error getting ssh options for host %s", hostObj.Id) 59 } 60 61 // prep the remote host 62 grip.Infof("Prepping remote host %v...", hostObj.Id) 63 agentRevision, err := agbh.prepRemoteHost(hostObj, sshOptions) 64 if err != nil { 65 return errors.Wrapf(err, "error prepping remote host %s", hostObj.Id) 66 } 67 grip.Infof("Prepping host %v finished successfully", hostObj.Id) 68 69 // start the agent on the remote machine 70 grip.Infof("Starting agent on host %v", hostObj.Id) 71 72 // generate the host secret if none exists 73 if hostObj.Secret == "" { 74 if err = hostObj.CreateSecret(); err != nil { 75 return errors.Wrapf(err, "creating secret for %s", hostObj.Id) 76 } 77 } 78 79 err = startAgentOnRemote(settings.ApiUrl, &hostObj, sshOptions) 80 if err != nil { 81 return errors.WithStack(err) 82 } 83 grip.Infof("Agent successfully started for host %v", hostObj.Id) 84 85 err = hostObj.SetAgentRevision(agentRevision) 86 if err != nil { 87 return errors.WithStack(err) 88 } 89 return nil 90 } 91 92 // Gets the git revision of the currently built agent 93 func (agbh *AgentHostGateway) GetAgentRevision() (string, error) { 94 95 versionFile := filepath.Join(agbh.ExecutablesDir, "version") 96 hashBytes, err := ioutil.ReadFile(versionFile) 97 if err != nil { 98 return "", errors.Wrap(err, "error reading agent version file") 99 } 100 101 return strings.TrimSpace(string(hashBytes)), nil 102 } 103 104 // executableSubPath returns the directory containing the compiled agents. 105 func executableSubPath(id string) (string, error) { 106 107 // get the full distro info, so we can figure out the architecture 108 d, err := distro.FindOne(distro.ById(id)) 109 if err != nil { 110 return "", errors.Wrapf(err, "error finding distro %v", id) 111 } 112 113 mainName := "main" 114 if strings.HasPrefix(d.Arch, "windows") { 115 mainName = "main.exe" 116 } 117 118 return filepath.Join(d.Arch, mainName), nil 119 } 120 121 func newCappedOutputLog() *util.CappedWriter { 122 // store up to 1MB of streamed command output to print if a command fails 123 return &util.CappedWriter{ 124 Buffer: &bytes.Buffer{}, 125 MaxBytes: 1024 * 1024, // 1MB 126 } 127 } 128 129 // Prepare the remote machine to run a task. 130 // This consists of: 131 // 1. Creating the directories on the remote host where, according to the distro's settings, 132 // the agent should be placed. 133 // 2. Copying the agent into that directory. 134 func (agbh *AgentHostGateway) prepRemoteHost(hostObj host.Host, sshOptions []string) (string, error) { 135 // compute any info necessary to ssh into the host 136 hostInfo, err := util.ParseSSHInfo(hostObj.Host) 137 if err != nil { 138 return "", errors.Wrapf(err, "error parsing ssh info %v", hostObj.Host) 139 } 140 141 // first, create the necessary sandbox of directories on the remote machine 142 mkdirOutput := newCappedOutputLog() 143 makeShellCmd := &command.RemoteCommand{ 144 Id: fmt.Sprintf("agent_mkdir-%v", rand.Int()), 145 CmdString: fmt.Sprintf("mkdir -m 777 -p %v", hostObj.Distro.WorkDir), 146 Stdout: mkdirOutput, 147 Stderr: mkdirOutput, 148 RemoteHostName: hostInfo.Hostname, 149 User: hostObj.User, 150 Options: append([]string{"-p", hostInfo.Port}, sshOptions...), 151 Background: false, 152 } 153 grip.Infof("Directories command: '%#v'", makeShellCmd) 154 155 // run the make shell command with a timeout 156 err = util.RunFunctionWithTimeout(makeShellCmd.Run, MakeShellTimeout) 157 grip.Notice(makeShellCmd.Stop()) 158 if err != nil { 159 // if it timed out, kill the command 160 if err == util.ErrTimedOut { 161 return "", errors.Errorf("creating remote directories timed out: %v", 162 mkdirOutput.String()) 163 } 164 return "", errors.Wrapf(err, "error creating directories on remote machine (%s)", 165 mkdirOutput.String()) 166 } 167 168 // third, copy over the correct agent binary to the remote machine 169 execSubPath, err := executableSubPath(hostObj.Distro.Id) 170 if err != nil { 171 return "", errors.Wrap(err, "error computing subpath to executable") 172 } 173 174 scpAgentOutput := newCappedOutputLog() 175 scpAgentCmd := &command.ScpCommand{ 176 Id: fmt.Sprintf("scp%v", rand.Int()), 177 Source: filepath.Join(agbh.ExecutablesDir, execSubPath), 178 Dest: hostObj.Distro.WorkDir, 179 Stdout: scpAgentOutput, 180 Stderr: scpAgentOutput, 181 RemoteHostName: hostInfo.Hostname, 182 User: hostObj.User, 183 Options: append([]string{"-P", hostInfo.Port}, sshOptions...), 184 } 185 186 // get the agent's revision before scp'ing over the executable 187 preSCPAgentRevision, err := agbh.GetAgentRevision() 188 grip.Error(errors.Wrap(err, "Error getting pre scp agent revision")) 189 190 // run the command to scp the agent with a timeout 191 err = util.RunFunctionWithTimeout(scpAgentCmd.Run, SCPTimeout) 192 grip.Notice(scpAgentCmd.Stop()) 193 if err != nil { 194 if err == util.ErrTimedOut { 195 return "", errors.Errorf("scp-ing agent binary timed out: %v", scpAgentOutput.String()) 196 } 197 return "", errors.Errorf( 198 "error copying agent binary to remote machine (%v): %v", err, scpAgentOutput.String()) 199 } 200 201 // get the agent's revision after scp'ing over the executable 202 postSCPAgentRevision, err := agbh.GetAgentRevision() 203 grip.Error(errors.Wrap(err, "Error getting post scp agent revision")) 204 grip.WarningWhenf(preSCPAgentRevision != postSCPAgentRevision, 205 "Agent revision was %v before scp but is now %v. Using previous revision %v for host %v", 206 preSCPAgentRevision, postSCPAgentRevision, preSCPAgentRevision, hostObj.Id) 207 208 return preSCPAgentRevision, nil 209 } 210 211 // Start the agent process on the specified remote host, and have it run the specified task. 212 func startAgentOnRemote(apiURL string, hostObj *host.Host, sshOptions []string) error { 213 // the path to the agent binary on the remote machine 214 pathToExecutable := filepath.Join(hostObj.Distro.WorkDir, "main") 215 216 // build the command to run on the remote machine 217 remoteCmd := fmt.Sprintf( 218 `%v -api_server "%v" -host_id "%v" -host_secret "%v" -log_prefix "%v" -https_cert "%v"`, 219 pathToExecutable, apiURL, hostObj.Id, hostObj.Secret, 220 filepath.Join(hostObj.Distro.WorkDir, agentFile), "") 221 grip.Info(remoteCmd) 222 223 // compute any info necessary to ssh into the host 224 hostInfo, err := util.ParseSSHInfo(hostObj.Host) 225 if err != nil { 226 return errors.Wrapf(err, "error parsing ssh info %v", hostObj.Host) 227 } 228 229 // run the command to kick off the agent remotely 230 var startAgentLog bytes.Buffer 231 startAgentCmd := &command.RemoteCommand{ 232 Id: fmt.Sprintf("startagent-%s-%d", hostObj.Id, rand.Int()), 233 CmdString: remoteCmd, 234 Stdout: &startAgentLog, 235 Stderr: &startAgentLog, 236 RemoteHostName: hostInfo.Hostname, 237 User: hostObj.User, 238 Options: append([]string{"-p", hostInfo.Port}, sshOptions...), 239 Background: true, 240 } 241 242 // run the command to start the agent with a timeout 243 err = util.RunFunctionWithTimeout( 244 startAgentCmd.Run, 245 StartAgentTimeout, 246 ) 247 248 // run cleanup regardless of what happens. 249 grip.Notice(startAgentCmd.Stop()) 250 251 if err != nil { 252 if err == util.ErrTimedOut { 253 return errors.Errorf("starting agent timed out on %s", hostObj.Id) 254 } 255 return errors.Wrapf(err, "error starting agent (%v): %v", hostObj.Id, startAgentLog.String()) 256 } 257 return nil 258 }