github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/client/driver/executor/executor.go (about) 1 package executor 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "log" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "syscall" 15 "time" 16 17 "github.com/hashicorp/go-multierror" 18 cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" 19 20 "github.com/hashicorp/nomad/client/allocdir" 21 "github.com/hashicorp/nomad/client/consul" 22 "github.com/hashicorp/nomad/client/driver/env" 23 "github.com/hashicorp/nomad/client/driver/logging" 24 cstructs "github.com/hashicorp/nomad/client/driver/structs" 25 "github.com/hashicorp/nomad/nomad/structs" 26 ) 27 28 // Executor is the interface which allows a driver to launch and supervise 29 // a process 30 type Executor interface { 31 LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error) 32 LaunchSyslogServer(ctx *ExecutorContext) (*SyslogServerState, error) 33 Wait() (*ProcessState, error) 34 ShutDown() error 35 Exit() error 36 UpdateLogConfig(logConfig *structs.LogConfig) error 37 UpdateTask(task *structs.Task) error 38 SyncServices(ctx *ConsulContext) error 39 DeregisterServices() error 40 Version() (*ExecutorVersion, error) 41 } 42 43 // ConsulContext holds context to configure the consul client and run checks 44 type ConsulContext struct { 45 // ConsulConfig is the configuration used to create a consul client 46 ConsulConfig *consul.ConsulConfig 47 48 // ContainerID is the ID of the container 49 ContainerID string 50 51 // TLSCert is the cert which docker client uses while interactng with the docker 52 // daemon over TLS 53 TLSCert string 54 55 // TLSCa is the CA which the docker client uses while interacting with the docker 56 // daeemon over TLS 57 TLSCa string 58 59 // TLSKey is the TLS key which the docker client uses while interacting with 60 // the docker daemon 61 TLSKey string 62 63 // DockerEndpoint is the endpoint of the docker daemon 64 DockerEndpoint string 65 } 66 67 // ExecutorContext holds context to configure the command user 68 // wants to run and isolate it 69 type ExecutorContext struct { 70 // TaskEnv holds information about the environment of a Task 71 TaskEnv *env.TaskEnvironment 72 73 // AllocDir is the handle to do operations on the alloc dir of 74 // the task 75 AllocDir *allocdir.AllocDir 76 77 // Task is the task whose executor is being launched 78 Task *structs.Task 79 80 // AllocID is the allocation id to which the task belongs 81 AllocID string 82 83 // Driver is the name of the driver that invoked the executor 84 Driver string 85 86 // PortUpperBound is the upper bound of the ports that we can use to start 87 // the syslog server 88 PortUpperBound uint 89 90 // PortLowerBound is the lower bound of the ports that we can use to start 91 // the syslog server 92 PortLowerBound uint 93 } 94 95 // ExecCommand holds the user command, args, and other isolation related 96 // settings. 97 type ExecCommand struct { 98 // Cmd is the command that the user wants to run. 99 Cmd string 100 101 // Args is the args of the command that the user wants to run. 102 Args []string 103 104 // FSIsolation determines whether the command would be run in a chroot. 105 FSIsolation bool 106 107 // User is the user which the executor uses to run the command. 108 User string 109 110 // ResourceLimits determines whether resource limits are enforced by the 111 // executor. 112 ResourceLimits bool 113 } 114 115 // ProcessState holds information about the state of a user process. 116 type ProcessState struct { 117 Pid int 118 ExitCode int 119 Signal int 120 IsolationConfig *cstructs.IsolationConfig 121 Time time.Time 122 } 123 124 // SyslogServerState holds the address and islation information of a launched 125 // syslog server 126 type SyslogServerState struct { 127 IsolationConfig *cstructs.IsolationConfig 128 Addr string 129 } 130 131 // ExecutorVersion is the version of the executor 132 type ExecutorVersion struct { 133 Version string 134 } 135 136 func (v *ExecutorVersion) GoString() string { 137 return v.Version 138 } 139 140 // UniversalExecutor is an implementation of the Executor which launches and 141 // supervises processes. In addition to process supervision it provides resource 142 // and file system isolation 143 type UniversalExecutor struct { 144 cmd exec.Cmd 145 ctx *ExecutorContext 146 command *ExecCommand 147 148 taskDir string 149 exitState *ProcessState 150 processExited chan interface{} 151 152 lre *logging.FileRotator 153 lro *logging.FileRotator 154 rotatorLock sync.Mutex 155 156 syslogServer *logging.SyslogServer 157 syslogChan chan *logging.SyslogMessage 158 159 groups *cgroupConfig.Cgroup 160 cgPaths map[string]string 161 cgLock sync.Mutex 162 163 consulService *consul.ConsulService 164 consulCtx *ConsulContext 165 logger *log.Logger 166 } 167 168 // NewExecutor returns an Executor 169 func NewExecutor(logger *log.Logger) Executor { 170 return &UniversalExecutor{ 171 logger: logger, 172 processExited: make(chan interface{}), 173 } 174 } 175 176 // Version returns the api version of the executor 177 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 178 return &ExecutorVersion{Version: "1.0.0"}, nil 179 } 180 181 // LaunchCmd launches a process and returns it's state. It also configures an 182 // applies isolation on certain platforms. 183 func (e *UniversalExecutor) LaunchCmd(command *ExecCommand, ctx *ExecutorContext) (*ProcessState, error) { 184 e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " ")) 185 186 e.ctx = ctx 187 e.command = command 188 189 // configuring the task dir 190 if err := e.configureTaskDir(); err != nil { 191 return nil, err 192 } 193 194 // configuring the chroot, cgroup and enters the plugin process in the 195 // chroot 196 if err := e.configureIsolation(); err != nil { 197 return nil, err 198 } 199 200 // setting the user of the process 201 if command.User != "" { 202 e.logger.Printf("[DEBUG] executor: running command as %s", command.User) 203 if err := e.runAs(command.User); err != nil { 204 return nil, err 205 } 206 } 207 208 // Setup the loggers 209 if err := e.configureLoggers(); err != nil { 210 return nil, err 211 } 212 e.cmd.Stdout = e.lro 213 e.cmd.Stderr = e.lre 214 215 e.ctx.TaskEnv.Build() 216 217 // Look up the binary path and make it executable 218 absPath, err := e.lookupBin(ctx.TaskEnv.ReplaceEnv(command.Cmd)) 219 if err != nil { 220 return nil, err 221 } 222 223 if err := e.makeExecutable(absPath); err != nil { 224 return nil, err 225 } 226 227 // Determine the path to run as it may have to be relative to the chroot. 228 path := absPath 229 if e.command.FSIsolation { 230 rel, err := filepath.Rel(e.taskDir, absPath) 231 if err != nil { 232 return nil, err 233 } 234 path = rel 235 } 236 237 // Set the commands arguments 238 e.cmd.Path = path 239 e.cmd.Args = append([]string{path}, ctx.TaskEnv.ParseAndReplace(command.Args)...) 240 e.cmd.Env = ctx.TaskEnv.EnvList() 241 242 // Apply ourselves into the cgroup. The executor MUST be in the cgroup 243 // before the user task is started, otherwise we are subject to a fork 244 // attack in which a process escapes isolation by immediately forking. 245 if err := e.applyLimits(os.Getpid()); err != nil { 246 return nil, err 247 } 248 249 // Start the process 250 if err := e.cmd.Start(); err != nil { 251 return nil, err 252 } 253 go e.wait() 254 ic := &cstructs.IsolationConfig{Cgroup: e.groups, CgroupPaths: e.cgPaths} 255 return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil 256 } 257 258 // configureLoggers sets up the standard out/error file rotators 259 func (e *UniversalExecutor) configureLoggers() error { 260 e.rotatorLock.Lock() 261 defer e.rotatorLock.Unlock() 262 263 logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024) 264 if e.lro == nil { 265 lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name), 266 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 267 if err != nil { 268 return err 269 } 270 e.lro = lro 271 } 272 273 if e.lre == nil { 274 lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name), 275 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 276 if err != nil { 277 return err 278 } 279 e.lre = lre 280 } 281 return nil 282 } 283 284 // Wait waits until a process has exited and returns it's exitcode and errors 285 func (e *UniversalExecutor) Wait() (*ProcessState, error) { 286 <-e.processExited 287 return e.exitState, nil 288 } 289 290 // COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist. 291 // UpdateLogConfig updates the log configuration 292 func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error { 293 e.ctx.Task.LogConfig = logConfig 294 if e.lro == nil { 295 return fmt.Errorf("log rotator for stdout doesn't exist") 296 } 297 e.lro.MaxFiles = logConfig.MaxFiles 298 e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 299 300 if e.lre == nil { 301 return fmt.Errorf("log rotator for stderr doesn't exist") 302 } 303 e.lre.MaxFiles = logConfig.MaxFiles 304 e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 305 return nil 306 } 307 308 func (e *UniversalExecutor) UpdateTask(task *structs.Task) error { 309 e.ctx.Task = task 310 311 // Updating Log Config 312 fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024) 313 e.lro.MaxFiles = task.LogConfig.MaxFiles 314 e.lro.FileSize = fileSize 315 e.lre.MaxFiles = task.LogConfig.MaxFiles 316 e.lre.FileSize = fileSize 317 318 // Re-syncing task with consul service 319 if e.consulService != nil { 320 if err := e.consulService.SyncServices(task.Services); err != nil { 321 return err 322 } 323 } 324 return nil 325 } 326 327 func (e *UniversalExecutor) wait() { 328 defer close(e.processExited) 329 err := e.cmd.Wait() 330 ic := &cstructs.IsolationConfig{Cgroup: e.groups, CgroupPaths: e.cgPaths} 331 if err == nil { 332 e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()} 333 return 334 } 335 exitCode := 1 336 var signal int 337 if exitErr, ok := err.(*exec.ExitError); ok { 338 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 339 exitCode = status.ExitStatus() 340 if status.Signaled() { 341 // bash(1) uses the lower 7 bits of a uint8 342 // to indicate normal program failure (see 343 // <sysexits.h>). If a process terminates due 344 // to a signal, encode the signal number to 345 // indicate which signal caused the process 346 // to terminate. Mirror this exit code 347 // encoding scheme. 348 const exitSignalBase = 128 349 signal = int(status.Signal()) 350 exitCode = exitSignalBase + signal 351 } 352 } 353 } else { 354 e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err) 355 } 356 357 e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()} 358 } 359 360 var ( 361 // finishedErr is the error message received when trying to kill and already 362 // exited process. 363 finishedErr = "os: process already finished" 364 ) 365 366 // Exit cleans up the alloc directory, destroys cgroups and kills the user 367 // process 368 func (e *UniversalExecutor) Exit() error { 369 var merr multierror.Error 370 if e.syslogServer != nil { 371 e.syslogServer.Shutdown() 372 } 373 e.lre.Close() 374 e.lro.Close() 375 376 // If the executor did not launch a process, return. 377 if e.command == nil { 378 return nil 379 } 380 381 // Prefer killing the process via cgroups. 382 if e.cmd.Process != nil && !e.command.ResourceLimits { 383 proc, err := os.FindProcess(e.cmd.Process.Pid) 384 if err != nil { 385 e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v", 386 e.cmd.Process.Pid, err) 387 } else if err := proc.Kill(); err != nil && err.Error() != finishedErr { 388 merr.Errors = append(merr.Errors, 389 fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err)) 390 } 391 } 392 393 if e.command.ResourceLimits { 394 e.cgLock.Lock() 395 if err := DestroyCgroup(e.groups, e.cgPaths, os.Getpid()); err != nil { 396 merr.Errors = append(merr.Errors, err) 397 } 398 e.cgLock.Unlock() 399 } 400 401 if e.command.FSIsolation { 402 if err := e.removeChrootMounts(); err != nil { 403 merr.Errors = append(merr.Errors, err) 404 } 405 } 406 return merr.ErrorOrNil() 407 } 408 409 // Shutdown sends an interrupt signal to the user process 410 func (e *UniversalExecutor) ShutDown() error { 411 if e.cmd.Process == nil { 412 return fmt.Errorf("executor.shutdown error: no process found") 413 } 414 proc, err := os.FindProcess(e.cmd.Process.Pid) 415 if err != nil { 416 return fmt.Errorf("executor.shutdown failed to find process: %v", err) 417 } 418 if runtime.GOOS == "windows" { 419 if err := proc.Kill(); err != nil && err.Error() != finishedErr { 420 return err 421 } 422 return nil 423 } 424 if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr { 425 return fmt.Errorf("executor.shutdown error: %v", err) 426 } 427 return nil 428 } 429 430 func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error { 431 e.logger.Printf("[INFO] executor: registering services") 432 e.consulCtx = ctx 433 if e.consulService == nil { 434 cs, err := consul.NewConsulService(ctx.ConsulConfig, e.logger) 435 if err != nil { 436 return err 437 } 438 cs.SetDelegatedChecks(e.createCheckMap(), e.createCheck) 439 cs.SetServiceIdentifier(consul.GenerateServiceIdentifier(e.ctx.AllocID, e.ctx.Task.Name)) 440 cs.SetAddrFinder(e.ctx.Task.FindHostAndPortFor) 441 e.consulService = cs 442 } 443 if e.ctx != nil { 444 e.interpolateServices(e.ctx.Task) 445 } 446 err := e.consulService.SyncServices(e.ctx.Task.Services) 447 go e.consulService.PeriodicSync() 448 return err 449 } 450 451 func (e *UniversalExecutor) DeregisterServices() error { 452 e.logger.Printf("[INFO] executor: de-registering services and shutting down consul service") 453 if e.consulService != nil { 454 return e.consulService.Shutdown() 455 } 456 return nil 457 } 458 459 // configureTaskDir sets the task dir in the executor 460 func (e *UniversalExecutor) configureTaskDir() error { 461 taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name] 462 e.taskDir = taskDir 463 if !ok { 464 return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name) 465 } 466 e.cmd.Dir = taskDir 467 return nil 468 } 469 470 // lookupBin looks for path to the binary to run by looking for the binary in 471 // the following locations, in-order: task/local/, task/, based on host $PATH. 472 // The return path is absolute. 473 func (e *UniversalExecutor) lookupBin(bin string) (string, error) { 474 // Check in the local directory 475 local := filepath.Join(e.taskDir, allocdir.TaskLocal, bin) 476 if _, err := os.Stat(local); err == nil { 477 return local, nil 478 } 479 480 // Check at the root of the task's directory 481 root := filepath.Join(e.taskDir, bin) 482 if _, err := os.Stat(root); err == nil { 483 return root, nil 484 } 485 486 // Check the $PATH 487 if host, err := exec.LookPath(bin); err == nil { 488 return host, nil 489 } 490 491 return "", fmt.Errorf("binary %q could not be found", bin) 492 } 493 494 // makeExecutable makes the given file executable for root,group,others. 495 func (e *UniversalExecutor) makeExecutable(binPath string) error { 496 if runtime.GOOS == "windows" { 497 return nil 498 } 499 500 fi, err := os.Stat(binPath) 501 if err != nil { 502 if os.IsNotExist(err) { 503 return fmt.Errorf("binary %q does not exist", binPath) 504 } 505 return fmt.Errorf("specified binary is invalid: %v", err) 506 } 507 508 // If it is not executable, make it so. 509 perm := fi.Mode().Perm() 510 req := os.FileMode(0555) 511 if perm&req != req { 512 if err := os.Chmod(binPath, perm|req); err != nil { 513 return fmt.Errorf("error making %q executable: %s", binPath, err) 514 } 515 } 516 return nil 517 } 518 519 // getFreePort returns a free port ready to be listened on between upper and 520 // lower bounds 521 func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) { 522 if runtime.GOOS == "windows" { 523 return e.listenerTCP(lowerBound, upperBound) 524 } 525 526 return e.listenerUnix() 527 } 528 529 // listenerTCP creates a TCP listener using an unused port between an upper and 530 // lower bound 531 func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) { 532 for i := lowerBound; i <= upperBound; i++ { 533 addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i)) 534 if err != nil { 535 return nil, err 536 } 537 l, err := net.ListenTCP("tcp", addr) 538 if err != nil { 539 continue 540 } 541 return l, nil 542 } 543 return nil, fmt.Errorf("No free port found") 544 } 545 546 // listenerUnix creates a Unix domain socket 547 func (e *UniversalExecutor) listenerUnix() (net.Listener, error) { 548 f, err := ioutil.TempFile("", "plugin") 549 if err != nil { 550 return nil, err 551 } 552 path := f.Name() 553 554 if err := f.Close(); err != nil { 555 return nil, err 556 } 557 if err := os.Remove(path); err != nil { 558 return nil, err 559 } 560 561 return net.Listen("unix", path) 562 } 563 564 // createCheckMap creates a map of checks that the executor will handle on it's 565 // own 566 func (e *UniversalExecutor) createCheckMap() map[string]struct{} { 567 checks := map[string]struct{}{ 568 "script": struct{}{}, 569 } 570 return checks 571 } 572 573 // createCheck creates NomadCheck from a ServiceCheck 574 func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID string) (consul.Check, error) { 575 if check.Type == structs.ServiceCheckScript && e.ctx.Driver == "docker" { 576 return &DockerScriptCheck{ 577 id: checkID, 578 interval: check.Interval, 579 timeout: check.Timeout, 580 containerID: e.consulCtx.ContainerID, 581 logger: e.logger, 582 cmd: check.Command, 583 args: check.Args, 584 }, nil 585 } 586 587 if check.Type == structs.ServiceCheckScript && (e.ctx.Driver == "exec" || 588 e.ctx.Driver == "raw_exec" || e.ctx.Driver == "java") { 589 return &ExecScriptCheck{ 590 id: checkID, 591 interval: check.Interval, 592 timeout: check.Timeout, 593 cmd: check.Command, 594 args: check.Args, 595 taskDir: e.taskDir, 596 FSIsolation: e.command.FSIsolation, 597 }, nil 598 599 } 600 return nil, fmt.Errorf("couldn't create check for %v", check.Name) 601 } 602 603 // interpolateServices interpolates tags in a service and checks with values from the 604 // task's environment. 605 func (e *UniversalExecutor) interpolateServices(task *structs.Task) { 606 e.ctx.TaskEnv.Build() 607 for _, service := range task.Services { 608 for _, check := range service.Checks { 609 if check.Type == structs.ServiceCheckScript { 610 check.Name = e.ctx.TaskEnv.ReplaceEnv(check.Name) 611 check.Command = e.ctx.TaskEnv.ReplaceEnv(check.Command) 612 check.Args = e.ctx.TaskEnv.ParseAndReplace(check.Args) 613 check.Path = e.ctx.TaskEnv.ReplaceEnv(check.Path) 614 check.Protocol = e.ctx.TaskEnv.ReplaceEnv(check.Protocol) 615 } 616 } 617 service.Name = e.ctx.TaskEnv.ReplaceEnv(service.Name) 618 service.Tags = e.ctx.TaskEnv.ParseAndReplace(service.Tags) 619 } 620 }