github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/driver/executor/executor.go (about) 1 package executor 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "log" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strconv" 13 "strings" 14 "sync" 15 "syscall" 16 "time" 17 18 "github.com/hashicorp/go-multierror" 19 "github.com/mitchellh/go-ps" 20 "github.com/shirou/gopsutil/process" 21 22 "github.com/hashicorp/nomad/client/allocdir" 23 "github.com/hashicorp/nomad/client/driver/env" 24 "github.com/hashicorp/nomad/client/driver/logging" 25 "github.com/hashicorp/nomad/client/stats" 26 "github.com/hashicorp/nomad/command/agent/consul" 27 shelpers "github.com/hashicorp/nomad/helper/stats" 28 "github.com/hashicorp/nomad/nomad/structs" 29 "github.com/hashicorp/nomad/nomad/structs/config" 30 31 dstructs "github.com/hashicorp/nomad/client/driver/structs" 32 cstructs "github.com/hashicorp/nomad/client/structs" 33 ) 34 35 const ( 36 // pidScanInterval is the interval at which the executor scans the process 37 // tree for finding out the pids that the executor and it's child processes 38 // have forked 39 pidScanInterval = 5 * time.Second 40 ) 41 42 var ( 43 // The statistics the basic executor exposes 44 ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} 45 ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} 46 ) 47 48 // Executor is the interface which allows a driver to launch and supervise 49 // a process 50 type Executor interface { 51 SetContext(ctx *ExecutorContext) error 52 LaunchCmd(command *ExecCommand) (*ProcessState, error) 53 LaunchSyslogServer() (*SyslogServerState, error) 54 Wait() (*ProcessState, error) 55 ShutDown() error 56 Exit() error 57 UpdateLogConfig(logConfig *structs.LogConfig) error 58 UpdateTask(task *structs.Task) error 59 SyncServices(ctx *ConsulContext) error 60 DeregisterServices() error 61 Version() (*ExecutorVersion, error) 62 Stats() (*cstructs.TaskResourceUsage, error) 63 Signal(s os.Signal) error 64 } 65 66 // ConsulContext holds context to configure the Consul client and run checks 67 type ConsulContext struct { 68 // ConsulConfig contains the configuration information for talking 69 // with this Nomad Agent's Consul Agent. 70 ConsulConfig *config.ConsulConfig 71 72 // ContainerID is the ID of the container 73 ContainerID string 74 75 // TLSCert is the cert which docker client uses while interactng with the docker 76 // daemon over TLS 77 TLSCert string 78 79 // TLSCa is the CA which the docker client uses while interacting with the docker 80 // daeemon over TLS 81 TLSCa string 82 83 // TLSKey is the TLS key which the docker client uses while interacting with 84 // the docker daemon 85 TLSKey string 86 87 // DockerEndpoint is the endpoint of the docker daemon 88 DockerEndpoint string 89 } 90 91 // ExecutorContext holds context to configure the command user 92 // wants to run and isolate it 93 type ExecutorContext struct { 94 // TaskEnv holds information about the environment of a Task 95 TaskEnv *env.TaskEnvironment 96 97 // AllocDir is the handle to do operations on the alloc dir of 98 // the task 99 AllocDir *allocdir.AllocDir 100 101 // Task is the task whose executor is being launched 102 Task *structs.Task 103 104 // AllocID is the allocation id to which the task belongs 105 AllocID string 106 107 // A mapping of directories on the host OS to attempt to embed inside each 108 // task's chroot. 109 ChrootEnv map[string]string 110 111 // Driver is the name of the driver that invoked the executor 112 Driver string 113 114 // PortUpperBound is the upper bound of the ports that we can use to start 115 // the syslog server 116 PortUpperBound uint 117 118 // PortLowerBound is the lower bound of the ports that we can use to start 119 // the syslog server 120 PortLowerBound uint 121 } 122 123 // ExecCommand holds the user command, args, and other isolation related 124 // settings. 125 type ExecCommand struct { 126 // Cmd is the command that the user wants to run. 127 Cmd string 128 129 // Args is the args of the command that the user wants to run. 130 Args []string 131 132 // FSIsolation determines whether the command would be run in a chroot. 133 FSIsolation bool 134 135 // User is the user which the executor uses to run the command. 136 User string 137 138 // ResourceLimits determines whether resource limits are enforced by the 139 // executor. 140 ResourceLimits bool 141 } 142 143 // ProcessState holds information about the state of a user process. 144 type ProcessState struct { 145 Pid int 146 ExitCode int 147 Signal int 148 IsolationConfig *dstructs.IsolationConfig 149 Time time.Time 150 } 151 152 // nomadPid holds a pid and it's cpu percentage calculator 153 type nomadPid struct { 154 pid int 155 cpuStatsTotal *stats.CpuStats 156 cpuStatsUser *stats.CpuStats 157 cpuStatsSys *stats.CpuStats 158 } 159 160 // SyslogServerState holds the address and islation information of a launched 161 // syslog server 162 type SyslogServerState struct { 163 IsolationConfig *dstructs.IsolationConfig 164 Addr string 165 } 166 167 // ExecutorVersion is the version of the executor 168 type ExecutorVersion struct { 169 Version string 170 } 171 172 func (v *ExecutorVersion) GoString() string { 173 return v.Version 174 } 175 176 // UniversalExecutor is an implementation of the Executor which launches and 177 // supervises processes. In addition to process supervision it provides resource 178 // and file system isolation 179 type UniversalExecutor struct { 180 cmd exec.Cmd 181 ctx *ExecutorContext 182 command *ExecCommand 183 184 pids map[int]*nomadPid 185 pidLock sync.RWMutex 186 taskDir string 187 exitState *ProcessState 188 processExited chan interface{} 189 fsIsolationEnforced bool 190 191 lre *logging.FileRotator 192 lro *logging.FileRotator 193 rotatorLock sync.Mutex 194 195 shutdownCh chan struct{} 196 197 syslogServer *logging.SyslogServer 198 syslogChan chan *logging.SyslogMessage 199 200 resConCtx resourceContainerContext 201 202 consulSyncer *consul.Syncer 203 consulCtx *ConsulContext 204 totalCpuStats *stats.CpuStats 205 userCpuStats *stats.CpuStats 206 systemCpuStats *stats.CpuStats 207 logger *log.Logger 208 } 209 210 // NewExecutor returns an Executor 211 func NewExecutor(logger *log.Logger) Executor { 212 if err := shelpers.Init(); err != nil { 213 logger.Printf("[FATAL] executor: unable to initialize stats: %v", err) 214 return nil 215 } 216 217 exec := &UniversalExecutor{ 218 logger: logger, 219 processExited: make(chan interface{}), 220 totalCpuStats: stats.NewCpuStats(), 221 userCpuStats: stats.NewCpuStats(), 222 systemCpuStats: stats.NewCpuStats(), 223 pids: make(map[int]*nomadPid), 224 } 225 226 return exec 227 } 228 229 // Version returns the api version of the executor 230 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 231 return &ExecutorVersion{Version: "1.0.0"}, nil 232 } 233 234 // SetContext is used to set the executors context and should be the first call 235 // after launching the executor. 236 func (e *UniversalExecutor) SetContext(ctx *ExecutorContext) error { 237 e.ctx = ctx 238 return nil 239 } 240 241 // LaunchCmd launches a process and returns it's state. It also configures an 242 // applies isolation on certain platforms. 243 func (e *UniversalExecutor) LaunchCmd(command *ExecCommand) (*ProcessState, error) { 244 e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " ")) 245 246 // Ensure the context has been set first 247 if e.ctx == nil { 248 return nil, fmt.Errorf("SetContext must be called before launching a command") 249 } 250 251 e.command = command 252 253 // setting the user of the process 254 if command.User != "" { 255 e.logger.Printf("[DEBUG] executor: running command as %s", command.User) 256 if err := e.runAs(command.User); err != nil { 257 return nil, err 258 } 259 } 260 261 // configuring the task dir 262 if err := e.configureTaskDir(); err != nil { 263 return nil, err 264 } 265 266 e.ctx.TaskEnv.Build() 267 // configuring the chroot, resource container, and start the plugin 268 // process in the chroot. 269 if err := e.configureIsolation(); err != nil { 270 return nil, err 271 } 272 // Apply ourselves into the resource container. The executor MUST be in 273 // the resource container before the user task is started, otherwise we 274 // are subject to a fork attack in which a process escapes isolation by 275 // immediately forking. 276 if err := e.applyLimits(os.Getpid()); err != nil { 277 return nil, err 278 } 279 280 // Setup the loggers 281 if err := e.configureLoggers(); err != nil { 282 return nil, err 283 } 284 e.cmd.Stdout = e.lro 285 e.cmd.Stderr = e.lre 286 287 // Look up the binary path and make it executable 288 absPath, err := e.lookupBin(e.ctx.TaskEnv.ReplaceEnv(command.Cmd)) 289 if err != nil { 290 return nil, err 291 } 292 293 if err := e.makeExecutable(absPath); err != nil { 294 return nil, err 295 } 296 297 path := absPath 298 299 // Determine the path to run as it may have to be relative to the chroot. 300 if e.fsIsolationEnforced { 301 rel, err := filepath.Rel(e.taskDir, path) 302 if err != nil { 303 return nil, err 304 } 305 path = rel 306 } 307 308 // Set the commands arguments 309 e.cmd.Path = path 310 e.cmd.Args = append([]string{e.cmd.Path}, e.ctx.TaskEnv.ParseAndReplace(command.Args)...) 311 e.cmd.Env = e.ctx.TaskEnv.EnvList() 312 313 // Start the process 314 if err := e.cmd.Start(); err != nil { 315 return nil, err 316 } 317 go e.collectPids() 318 go e.wait() 319 ic := e.resConCtx.getIsolationConfig() 320 return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil 321 } 322 323 // configureLoggers sets up the standard out/error file rotators 324 func (e *UniversalExecutor) configureLoggers() error { 325 e.rotatorLock.Lock() 326 defer e.rotatorLock.Unlock() 327 328 logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024) 329 if e.lro == nil { 330 lro, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stdout", e.ctx.Task.Name), 331 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 332 if err != nil { 333 return err 334 } 335 e.lro = lro 336 } 337 338 if e.lre == nil { 339 lre, err := logging.NewFileRotator(e.ctx.AllocDir.LogDir(), fmt.Sprintf("%v.stderr", e.ctx.Task.Name), 340 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 341 if err != nil { 342 return err 343 } 344 e.lre = lre 345 } 346 return nil 347 } 348 349 // Wait waits until a process has exited and returns it's exitcode and errors 350 func (e *UniversalExecutor) Wait() (*ProcessState, error) { 351 <-e.processExited 352 return e.exitState, nil 353 } 354 355 // COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist. 356 // UpdateLogConfig updates the log configuration 357 func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error { 358 e.ctx.Task.LogConfig = logConfig 359 if e.lro == nil { 360 return fmt.Errorf("log rotator for stdout doesn't exist") 361 } 362 e.lro.MaxFiles = logConfig.MaxFiles 363 e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 364 365 if e.lre == nil { 366 return fmt.Errorf("log rotator for stderr doesn't exist") 367 } 368 e.lre.MaxFiles = logConfig.MaxFiles 369 e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 370 return nil 371 } 372 373 func (e *UniversalExecutor) UpdateTask(task *structs.Task) error { 374 e.ctx.Task = task 375 376 // Updating Log Config 377 e.rotatorLock.Lock() 378 if e.lro != nil && e.lre != nil { 379 fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024) 380 e.lro.MaxFiles = task.LogConfig.MaxFiles 381 e.lro.FileSize = fileSize 382 e.lre.MaxFiles = task.LogConfig.MaxFiles 383 e.lre.FileSize = fileSize 384 } 385 e.rotatorLock.Unlock() 386 387 // Re-syncing task with Consul agent 388 if e.consulSyncer != nil { 389 e.interpolateServices(e.ctx.Task) 390 domain := consul.NewExecutorDomain(e.ctx.AllocID, task.Name) 391 serviceMap := generateServiceKeys(e.ctx.AllocID, task.Services) 392 e.consulSyncer.SetServices(domain, serviceMap) 393 } 394 return nil 395 } 396 397 // generateServiceKeys takes a list of interpolated Nomad Services and returns a map 398 // of ServiceKeys to Nomad Services. 399 func generateServiceKeys(allocID string, services []*structs.Service) map[consul.ServiceKey]*structs.Service { 400 keys := make(map[consul.ServiceKey]*structs.Service, len(services)) 401 for _, service := range services { 402 key := consul.GenerateServiceKey(service) 403 keys[key] = service 404 } 405 return keys 406 } 407 408 func (e *UniversalExecutor) wait() { 409 defer close(e.processExited) 410 err := e.cmd.Wait() 411 ic := e.resConCtx.getIsolationConfig() 412 if err == nil { 413 e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()} 414 return 415 } 416 417 e.lre.Close() 418 e.lro.Close() 419 420 exitCode := 1 421 var signal int 422 if exitErr, ok := err.(*exec.ExitError); ok { 423 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 424 exitCode = status.ExitStatus() 425 if status.Signaled() { 426 // bash(1) uses the lower 7 bits of a uint8 427 // to indicate normal program failure (see 428 // <sysexits.h>). If a process terminates due 429 // to a signal, encode the signal number to 430 // indicate which signal caused the process 431 // to terminate. Mirror this exit code 432 // encoding scheme. 433 const exitSignalBase = 128 434 signal = int(status.Signal()) 435 exitCode = exitSignalBase + signal 436 } 437 } 438 } else { 439 e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err) 440 } 441 442 e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()} 443 } 444 445 var ( 446 // finishedErr is the error message received when trying to kill and already 447 // exited process. 448 finishedErr = "os: process already finished" 449 ) 450 451 // ClientCleanup is the cleanup routine that a Nomad Client uses to remove the 452 // reminants of a child UniversalExecutor. 453 func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error { 454 return clientCleanup(ic, pid) 455 } 456 457 // Exit cleans up the alloc directory, destroys resource container and kills the 458 // user process 459 func (e *UniversalExecutor) Exit() error { 460 var merr multierror.Error 461 if e.syslogServer != nil { 462 e.syslogServer.Shutdown() 463 } 464 465 if e.lre != nil { 466 e.lre.Close() 467 } 468 469 if e.lro != nil { 470 e.lro.Close() 471 } 472 473 if e.consulSyncer != nil { 474 e.consulSyncer.Shutdown() 475 } 476 477 // If the executor did not launch a process, return. 478 if e.command == nil { 479 return nil 480 } 481 482 // Prefer killing the process via the resource container. 483 if e.cmd.Process != nil && !e.command.ResourceLimits { 484 proc, err := os.FindProcess(e.cmd.Process.Pid) 485 if err != nil { 486 e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v", 487 e.cmd.Process.Pid, err) 488 } else if err := proc.Kill(); err != nil && err.Error() != finishedErr { 489 merr.Errors = append(merr.Errors, 490 fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err)) 491 } 492 } 493 494 if e.command.ResourceLimits { 495 if err := e.resConCtx.executorCleanup(); err != nil { 496 merr.Errors = append(merr.Errors, err) 497 } 498 } 499 500 if e.command.FSIsolation { 501 if err := e.removeChrootMounts(); err != nil { 502 merr.Errors = append(merr.Errors, err) 503 } 504 } 505 return merr.ErrorOrNil() 506 } 507 508 // Shutdown sends an interrupt signal to the user process 509 func (e *UniversalExecutor) ShutDown() error { 510 if e.cmd.Process == nil { 511 return fmt.Errorf("executor.shutdown error: no process found") 512 } 513 proc, err := os.FindProcess(e.cmd.Process.Pid) 514 if err != nil { 515 return fmt.Errorf("executor.shutdown failed to find process: %v", err) 516 } 517 if runtime.GOOS == "windows" { 518 if err := proc.Kill(); err != nil && err.Error() != finishedErr { 519 return err 520 } 521 return nil 522 } 523 if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr { 524 return fmt.Errorf("executor.shutdown error: %v", err) 525 } 526 return nil 527 } 528 529 // SyncServices syncs the services of the task that the executor is running with 530 // Consul 531 func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error { 532 e.logger.Printf("[INFO] executor: registering services") 533 e.consulCtx = ctx 534 if e.consulSyncer == nil { 535 cs, err := consul.NewSyncer(ctx.ConsulConfig, e.shutdownCh, e.logger) 536 if err != nil { 537 return err 538 } 539 e.consulSyncer = cs 540 go e.consulSyncer.Run() 541 } 542 e.interpolateServices(e.ctx.Task) 543 e.consulSyncer.SetDelegatedChecks(e.createCheckMap(), e.createCheck) 544 e.consulSyncer.SetAddrFinder(e.ctx.Task.FindHostAndPortFor) 545 domain := consul.NewExecutorDomain(e.ctx.AllocID, e.ctx.Task.Name) 546 serviceMap := generateServiceKeys(e.ctx.AllocID, e.ctx.Task.Services) 547 e.consulSyncer.SetServices(domain, serviceMap) 548 return nil 549 } 550 551 // DeregisterServices removes the services of the task that the executor is 552 // running from Consul 553 func (e *UniversalExecutor) DeregisterServices() error { 554 e.logger.Printf("[INFO] executor: de-registering services and shutting down consul service") 555 if e.consulSyncer != nil { 556 return e.consulSyncer.Shutdown() 557 } 558 return nil 559 } 560 561 // pidStats returns the resource usage stats per pid 562 func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) { 563 stats := make(map[string]*cstructs.ResourceUsage) 564 e.pidLock.RLock() 565 pids := make(map[int]*nomadPid, len(e.pids)) 566 for k, v := range e.pids { 567 pids[k] = v 568 } 569 e.pidLock.RUnlock() 570 for pid, np := range pids { 571 p, err := process.NewProcess(int32(pid)) 572 if err != nil { 573 e.logger.Printf("[DEBUG] executor: unable to create new process with pid: %v", pid) 574 continue 575 } 576 ms := &cstructs.MemoryStats{} 577 if memInfo, err := p.MemoryInfo(); err == nil { 578 ms.RSS = memInfo.RSS 579 ms.Swap = memInfo.Swap 580 ms.Measured = ExecutorBasicMeasuredMemStats 581 } 582 583 cs := &cstructs.CpuStats{} 584 if cpuStats, err := p.Times(); err == nil { 585 cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second)) 586 cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second)) 587 cs.Measured = ExecutorBasicMeasuredCpuStats 588 589 // calculate cpu usage percent 590 cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second)) 591 } 592 stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs} 593 } 594 595 return stats, nil 596 } 597 598 // configureTaskDir sets the task dir in the executor 599 func (e *UniversalExecutor) configureTaskDir() error { 600 taskDir, ok := e.ctx.AllocDir.TaskDirs[e.ctx.Task.Name] 601 e.taskDir = taskDir 602 if !ok { 603 return fmt.Errorf("couldn't find task directory for task %v", e.ctx.Task.Name) 604 } 605 e.cmd.Dir = taskDir 606 return nil 607 } 608 609 // lookupBin looks for path to the binary to run by looking for the binary in 610 // the following locations, in-order: task/local/, task/, based on host $PATH. 611 // The return path is absolute. 612 func (e *UniversalExecutor) lookupBin(bin string) (string, error) { 613 // Check in the local directory 614 local := filepath.Join(e.taskDir, allocdir.TaskLocal, bin) 615 if _, err := os.Stat(local); err == nil { 616 return local, nil 617 } 618 619 // Check at the root of the task's directory 620 root := filepath.Join(e.taskDir, bin) 621 if _, err := os.Stat(root); err == nil { 622 return root, nil 623 } 624 625 // Check the $PATH 626 if host, err := exec.LookPath(bin); err == nil { 627 return host, nil 628 } 629 630 return "", fmt.Errorf("binary %q could not be found", bin) 631 } 632 633 // makeExecutable makes the given file executable for root,group,others. 634 func (e *UniversalExecutor) makeExecutable(binPath string) error { 635 if runtime.GOOS == "windows" { 636 return nil 637 } 638 639 fi, err := os.Stat(binPath) 640 if err != nil { 641 if os.IsNotExist(err) { 642 return fmt.Errorf("binary %q does not exist", binPath) 643 } 644 return fmt.Errorf("specified binary is invalid: %v", err) 645 } 646 647 // If it is not executable, make it so. 648 perm := fi.Mode().Perm() 649 req := os.FileMode(0555) 650 if perm&req != req { 651 if err := os.Chmod(binPath, perm|req); err != nil { 652 return fmt.Errorf("error making %q executable: %s", binPath, err) 653 } 654 } 655 return nil 656 } 657 658 // getFreePort returns a free port ready to be listened on between upper and 659 // lower bounds 660 func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) { 661 if runtime.GOOS == "windows" { 662 return e.listenerTCP(lowerBound, upperBound) 663 } 664 665 return e.listenerUnix() 666 } 667 668 // listenerTCP creates a TCP listener using an unused port between an upper and 669 // lower bound 670 func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) { 671 for i := lowerBound; i <= upperBound; i++ { 672 addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i)) 673 if err != nil { 674 return nil, err 675 } 676 l, err := net.ListenTCP("tcp", addr) 677 if err != nil { 678 continue 679 } 680 return l, nil 681 } 682 return nil, fmt.Errorf("No free port found") 683 } 684 685 // listenerUnix creates a Unix domain socket 686 func (e *UniversalExecutor) listenerUnix() (net.Listener, error) { 687 f, err := ioutil.TempFile("", "plugin") 688 if err != nil { 689 return nil, err 690 } 691 path := f.Name() 692 693 if err := f.Close(); err != nil { 694 return nil, err 695 } 696 if err := os.Remove(path); err != nil { 697 return nil, err 698 } 699 700 return net.Listen("unix", path) 701 } 702 703 // createCheckMap creates a map of checks that the executor will handle on it's 704 // own 705 func (e *UniversalExecutor) createCheckMap() map[string]struct{} { 706 checks := map[string]struct{}{ 707 "script": struct{}{}, 708 } 709 return checks 710 } 711 712 // createCheck creates NomadCheck from a ServiceCheck 713 func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID string) (consul.Check, error) { 714 if check.Type == structs.ServiceCheckScript && e.ctx.Driver == "docker" { 715 return &DockerScriptCheck{ 716 id: checkID, 717 interval: check.Interval, 718 timeout: check.Timeout, 719 containerID: e.consulCtx.ContainerID, 720 logger: e.logger, 721 cmd: check.Command, 722 args: check.Args, 723 }, nil 724 } 725 726 if check.Type == structs.ServiceCheckScript && (e.ctx.Driver == "exec" || 727 e.ctx.Driver == "raw_exec" || e.ctx.Driver == "java") { 728 return &ExecScriptCheck{ 729 id: checkID, 730 interval: check.Interval, 731 timeout: check.Timeout, 732 cmd: check.Command, 733 args: check.Args, 734 taskDir: e.taskDir, 735 FSIsolation: e.command.FSIsolation, 736 }, nil 737 738 } 739 return nil, fmt.Errorf("couldn't create check for %v", check.Name) 740 } 741 742 // interpolateServices interpolates tags in a service and checks with values from the 743 // task's environment. 744 func (e *UniversalExecutor) interpolateServices(task *structs.Task) { 745 e.ctx.TaskEnv.Build() 746 for _, service := range task.Services { 747 for _, check := range service.Checks { 748 check.Name = e.ctx.TaskEnv.ReplaceEnv(check.Name) 749 check.Type = e.ctx.TaskEnv.ReplaceEnv(check.Type) 750 check.Command = e.ctx.TaskEnv.ReplaceEnv(check.Command) 751 check.Args = e.ctx.TaskEnv.ParseAndReplace(check.Args) 752 check.Path = e.ctx.TaskEnv.ReplaceEnv(check.Path) 753 check.Protocol = e.ctx.TaskEnv.ReplaceEnv(check.Protocol) 754 check.PortLabel = e.ctx.TaskEnv.ReplaceEnv(check.PortLabel) 755 check.InitialStatus = e.ctx.TaskEnv.ReplaceEnv(check.InitialStatus) 756 } 757 service.Name = e.ctx.TaskEnv.ReplaceEnv(service.Name) 758 service.PortLabel = e.ctx.TaskEnv.ReplaceEnv(service.PortLabel) 759 service.Tags = e.ctx.TaskEnv.ParseAndReplace(service.Tags) 760 } 761 } 762 763 // collectPids collects the pids of the child processes that the executor is 764 // running every 5 seconds 765 func (e *UniversalExecutor) collectPids() { 766 // Fire the timer right away when the executor starts from there on the pids 767 // are collected every scan interval 768 timer := time.NewTimer(0) 769 defer timer.Stop() 770 for { 771 select { 772 case <-timer.C: 773 pids, err := e.getAllPids() 774 if err != nil { 775 e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err) 776 } 777 e.pidLock.Lock() 778 779 // Adding pids which are not being tracked 780 for pid, np := range pids { 781 if _, ok := e.pids[pid]; !ok { 782 e.pids[pid] = np 783 } 784 } 785 // Removing pids which are no longer present 786 for pid := range e.pids { 787 if _, ok := pids[pid]; !ok { 788 delete(e.pids, pid) 789 } 790 } 791 e.pidLock.Unlock() 792 timer.Reset(pidScanInterval) 793 case <-e.processExited: 794 return 795 } 796 } 797 } 798 799 // scanPids scans all the pids on the machine running the current executor and 800 // returns the child processes of the executor. 801 func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) { 802 processFamily := make(map[int]struct{}) 803 processFamily[parentPid] = struct{}{} 804 805 // A mapping of pids to their parent pids. It is used to build the process 806 // tree of the executing task 807 pidsRemaining := make(map[int]int, len(allPids)) 808 for _, pid := range allPids { 809 pidsRemaining[pid.Pid()] = pid.PPid() 810 } 811 812 for { 813 // flag to indicate if we have found a match 814 foundNewPid := false 815 816 for pid, ppid := range pidsRemaining { 817 _, childPid := processFamily[ppid] 818 819 // checking if the pid is a child of any of the parents 820 if childPid { 821 processFamily[pid] = struct{}{} 822 delete(pidsRemaining, pid) 823 foundNewPid = true 824 } 825 } 826 827 // not scanning anymore if we couldn't find a single match 828 if !foundNewPid { 829 break 830 } 831 } 832 833 res := make(map[int]*nomadPid) 834 for pid := range processFamily { 835 np := nomadPid{ 836 pid: pid, 837 cpuStatsTotal: stats.NewCpuStats(), 838 cpuStatsUser: stats.NewCpuStats(), 839 cpuStatsSys: stats.NewCpuStats(), 840 } 841 res[pid] = &np 842 } 843 return res, nil 844 } 845 846 // aggregatedResourceUsage aggregates the resource usage of all the pids and 847 // returns a TaskResourceUsage data point 848 func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage { 849 ts := time.Now().UTC().UnixNano() 850 var ( 851 systemModeCPU, userModeCPU, percent float64 852 totalRSS, totalSwap uint64 853 ) 854 855 for _, pidStat := range pidStats { 856 systemModeCPU += pidStat.CpuStats.SystemMode 857 userModeCPU += pidStat.CpuStats.UserMode 858 percent += pidStat.CpuStats.Percent 859 860 totalRSS += pidStat.MemoryStats.RSS 861 totalSwap += pidStat.MemoryStats.Swap 862 } 863 864 totalCPU := &cstructs.CpuStats{ 865 SystemMode: systemModeCPU, 866 UserMode: userModeCPU, 867 Percent: percent, 868 Measured: ExecutorBasicMeasuredCpuStats, 869 TotalTicks: e.systemCpuStats.TicksConsumed(percent), 870 } 871 872 totalMemory := &cstructs.MemoryStats{ 873 RSS: totalRSS, 874 Swap: totalSwap, 875 Measured: ExecutorBasicMeasuredMemStats, 876 } 877 878 resourceUsage := cstructs.ResourceUsage{ 879 MemoryStats: totalMemory, 880 CpuStats: totalCPU, 881 } 882 return &cstructs.TaskResourceUsage{ 883 ResourceUsage: &resourceUsage, 884 Timestamp: ts, 885 Pids: pidStats, 886 } 887 } 888 889 // Signal sends the passed signal to the task 890 func (e *UniversalExecutor) Signal(s os.Signal) error { 891 if e.cmd.Process == nil { 892 return fmt.Errorf("Task not yet run") 893 } 894 895 e.logger.Printf("[DEBUG] executor: sending signal %s", s) 896 err := e.cmd.Process.Signal(s) 897 if err != nil { 898 e.logger.Printf("[ERR] executor: sending signal %s failed: %v", err) 899 return err 900 } 901 902 return nil 903 }