github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/client/driver/spawn/spawn.go (about) 1 package spawn 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "io" 8 "os" 9 "os/exec" 10 "strconv" 11 "time" 12 13 "github.com/hashicorp/go-multierror" 14 "github.com/hashicorp/nomad/client/driver/structs" 15 "github.com/hashicorp/nomad/command" 16 "github.com/hashicorp/nomad/helper/discover" 17 ) 18 19 // Spawner is used to start a user command in an isolated fashion that is 20 // resistent to Nomad agent failure. 21 type Spawner struct { 22 spawn *os.Process 23 SpawnPid int 24 SpawnPpid int 25 StateFile string 26 UserPid int 27 28 // User configuration 29 UserCmd *exec.Cmd 30 Logs *Logs 31 Chroot string 32 } 33 34 // Logs is used to define the filepaths the user command's logs should be 35 // redirected to. The files do not need to exist. 36 type Logs struct { 37 Stdin, Stdout, Stderr string 38 } 39 40 // NewSpawner takes a path to a state file. This state file can be used to 41 // create a new Spawner that can be used to wait on the exit status of a 42 // process even through Nomad restarts. 43 func NewSpawner(stateFile string) *Spawner { 44 return &Spawner{StateFile: stateFile} 45 } 46 47 // SetCommand sets the user command to spawn. 48 func (s *Spawner) SetCommand(cmd *exec.Cmd) { 49 s.UserCmd = cmd 50 } 51 52 // SetLogs sets the redirection of user command log files. 53 func (s *Spawner) SetLogs(l *Logs) { 54 s.Logs = l 55 } 56 57 // SetChroot puts the user command into a chroot. 58 func (s *Spawner) SetChroot(root string) { 59 s.Chroot = root 60 } 61 62 // Spawn does a double-fork to start and isolate the user command. It takes a 63 // call-back that is invoked with the pid of the intermediary process. If the 64 // call back returns an error, the user command is not started and the spawn is 65 // cancelled. This can be used to put the process into a cgroup or jail and 66 // cancel starting the user process if that was not successful. An error is 67 // returned if the call-back returns an error or the user-command couldn't be 68 // started. 69 func (s *Spawner) Spawn(cb func(pid int) error) error { 70 bin, err := discover.NomadExecutable() 71 if err != nil { 72 return fmt.Errorf("Failed to determine the nomad executable: %v", err) 73 } 74 75 exitFile, err := os.OpenFile(s.StateFile, os.O_CREATE|os.O_WRONLY, 0666) 76 if err != nil { 77 return fmt.Errorf("Error opening file to store exit status: %v", err) 78 } 79 defer exitFile.Close() 80 81 config, err := s.spawnConfig() 82 if err != nil { 83 return err 84 } 85 86 spawn := exec.Command(bin, "spawn-daemon", config) 87 88 // Capture stdout 89 spawnStdout, err := spawn.StdoutPipe() 90 if err != nil { 91 return fmt.Errorf("Failed to capture spawn-daemon stdout: %v", err) 92 } 93 defer spawnStdout.Close() 94 95 // Capture stdin. 96 spawnStdin, err := spawn.StdinPipe() 97 if err != nil { 98 return fmt.Errorf("Failed to capture spawn-daemon stdin: %v", err) 99 } 100 defer spawnStdin.Close() 101 102 if err := spawn.Start(); err != nil { 103 return fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err) 104 } 105 106 if cb != nil { 107 cbErr := cb(spawn.Process.Pid) 108 if cbErr != nil { 109 errs := new(multierror.Error) 110 errs = multierror.Append(errs, cbErr) 111 if err := s.sendAbortCommand(spawnStdin); err != nil { 112 errs = multierror.Append(errs, err) 113 } 114 115 return errs 116 } 117 } 118 119 if err := s.sendStartCommand(spawnStdin); err != nil { 120 return err 121 } 122 123 respCh := make(chan command.SpawnStartStatus, 1) 124 errCh := make(chan error, 1) 125 126 go func() { 127 var resp command.SpawnStartStatus 128 dec := json.NewDecoder(spawnStdout) 129 if err := dec.Decode(&resp); err != nil { 130 errCh <- fmt.Errorf("Failed to parse spawn-daemon start response: %v", err) 131 } 132 respCh <- resp 133 }() 134 135 select { 136 case err := <-errCh: 137 return err 138 case resp := <-respCh: 139 if resp.ErrorMsg != "" { 140 return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg) 141 } 142 s.UserPid = resp.UserPID 143 case <-time.After(5 * time.Second): 144 return fmt.Errorf("timed out waiting for response") 145 } 146 147 // Store the spawn process. 148 s.spawn = spawn.Process 149 s.SpawnPid = s.spawn.Pid 150 s.SpawnPpid = os.Getpid() 151 return nil 152 } 153 154 // spawnConfig returns a serialized config to pass to the Nomad spawn-daemon 155 // command. 156 func (s *Spawner) spawnConfig() (string, error) { 157 if s.UserCmd == nil { 158 return "", fmt.Errorf("Must specify user command") 159 } 160 161 config := command.DaemonConfig{ 162 Cmd: *s.UserCmd, 163 Chroot: s.Chroot, 164 ExitStatusFile: s.StateFile, 165 } 166 167 if s.Logs != nil { 168 config.StdoutFile = s.Logs.Stdout 169 config.StdinFile = s.Logs.Stdin 170 config.StderrFile = s.Logs.Stderr 171 } 172 173 var buffer bytes.Buffer 174 enc := json.NewEncoder(&buffer) 175 if err := enc.Encode(config); err != nil { 176 return "", fmt.Errorf("Failed to serialize configuration: %v", err) 177 } 178 179 return strconv.Quote(buffer.String()), nil 180 } 181 182 // sendStartCommand sends the necessary command to the spawn-daemon to have it 183 // start the user process. 184 func (s *Spawner) sendStartCommand(w io.Writer) error { 185 enc := json.NewEncoder(w) 186 if err := enc.Encode(true); err != nil { 187 return fmt.Errorf("Failed to serialize start command: %v", err) 188 } 189 190 return nil 191 } 192 193 // sendAbortCommand sends the necessary command to the spawn-daemon to have it 194 // abort starting the user process. This should be invoked if the spawn-daemon 195 // could not be isolated into a cgroup. 196 func (s *Spawner) sendAbortCommand(w io.Writer) error { 197 enc := json.NewEncoder(w) 198 if err := enc.Encode(false); err != nil { 199 return fmt.Errorf("Failed to serialize abort command: %v", err) 200 } 201 202 return nil 203 } 204 205 // Wait returns the exit code of the user process or an error if the wait 206 // failed. 207 func (s *Spawner) Wait() *structs.WaitResult { 208 if os.Getpid() == s.SpawnPpid { 209 return s.waitAsParent() 210 } 211 212 return s.pollWait() 213 } 214 215 // waitAsParent waits on the process if the current process was the spawner. 216 func (s *Spawner) waitAsParent() *structs.WaitResult { 217 if s.SpawnPpid != os.Getpid() { 218 return structs.NewWaitResult(-1, 0, fmt.Errorf("not the parent. Spawner parent is %v; current pid is %v", s.SpawnPpid, os.Getpid())) 219 } 220 221 // Try to reattach to the spawn. 222 if s.spawn == nil { 223 // If it can't be reattached, it means the spawn process has exited so 224 // we should just read its exit file. 225 var err error 226 if s.spawn, err = os.FindProcess(s.SpawnPid); err != nil { 227 return s.pollWait() 228 } 229 } 230 231 if _, err := s.spawn.Wait(); err != nil { 232 return structs.NewWaitResult(-1, 0, err) 233 } 234 235 return s.pollWait() 236 } 237 238 // pollWait polls on the spawn daemon to determine when it exits. After it 239 // exits, it reads the state file and returns the exit code and possibly an 240 // error. 241 func (s *Spawner) pollWait() *structs.WaitResult { 242 // Stat to check if it is there to avoid a race condition. 243 stat, err := os.Stat(s.StateFile) 244 if err != nil { 245 return structs.NewWaitResult(-1, 0, fmt.Errorf("Failed to Stat exit status file %v: %v", s.StateFile, err)) 246 } 247 248 // If there is data it means that the file has already been written. 249 if stat.Size() > 0 { 250 return s.readExitCode() 251 } 252 253 // Read after the process exits. 254 ticker := time.NewTicker(5 * time.Second) 255 defer ticker.Stop() 256 for range ticker.C { 257 if !s.Alive() { 258 break 259 } 260 } 261 262 return s.readExitCode() 263 } 264 265 // readExitCode parses the state file and returns the exit code of the task. It 266 // returns an error if the file can't be read. 267 func (s *Spawner) readExitCode() *structs.WaitResult { 268 f, err := os.Open(s.StateFile) 269 if err != nil { 270 return structs.NewWaitResult(-1, 0, fmt.Errorf("Failed to open %v to read exit code: %v", s.StateFile, err)) 271 } 272 defer f.Close() 273 274 stat, err := f.Stat() 275 if err != nil { 276 return structs.NewWaitResult(-1, 0, fmt.Errorf("Failed to stat file %v: %v", s.StateFile, err)) 277 } 278 279 if stat.Size() == 0 { 280 return structs.NewWaitResult(-1, 0, fmt.Errorf("Empty state file: %v", s.StateFile)) 281 } 282 283 var exitStatus command.SpawnExitStatus 284 dec := json.NewDecoder(f) 285 if err := dec.Decode(&exitStatus); err != nil { 286 return structs.NewWaitResult(-1, 0, fmt.Errorf("Failed to parse exit status from %v: %v", s.StateFile, err)) 287 } 288 289 return structs.NewWaitResult(exitStatus.ExitCode, 0, nil) 290 } 291 292 // Valid checks that the state of the Spawner is valid and that a subsequent 293 // Wait could be called. This is useful to call when reopening a Spawner 294 // through client restarts. If Valid a nil error is returned. 295 func (s *Spawner) Valid() error { 296 // If the spawner is still alive, then the task is running and we can wait 297 // on it. 298 if s.Alive() { 299 return nil 300 } 301 302 // The task isn't alive so check that there is a valid exit code file. 303 if res := s.readExitCode(); res.Err == nil { 304 return nil 305 } 306 307 return fmt.Errorf("Spawner not alive and exit code not written") 308 }