github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/client/driver/executor/executor.go (about) 1 package executor 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "log" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strconv" 13 "strings" 14 "sync" 15 "syscall" 16 "time" 17 18 "github.com/hashicorp/go-multierror" 19 "github.com/mitchellh/go-ps" 20 "github.com/shirou/gopsutil/process" 21 22 "github.com/ncodes/nomad/client/allocdir" 23 "github.com/ncodes/nomad/client/driver/env" 24 "github.com/ncodes/nomad/client/driver/logging" 25 "github.com/ncodes/nomad/client/stats" 26 "github.com/ncodes/nomad/command/agent/consul" 27 shelpers "github.com/ncodes/nomad/helper/stats" 28 "github.com/ncodes/nomad/nomad/structs" 29 "github.com/ncodes/nomad/nomad/structs/config" 30 31 dstructs "github.com/ncodes/nomad/client/driver/structs" 32 cstructs "github.com/ncodes/nomad/client/structs" 33 ) 34 35 const ( 36 // pidScanInterval is the interval at which the executor scans the process 37 // tree for finding out the pids that the executor and it's child processes 38 // have forked 39 pidScanInterval = 5 * time.Second 40 ) 41 42 var ( 43 // The statistics the basic executor exposes 44 ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} 45 ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} 46 ) 47 48 // Executor is the interface which allows a driver to launch and supervise 49 // a process 50 type Executor interface { 51 SetContext(ctx *ExecutorContext) error 52 LaunchCmd(command *ExecCommand) (*ProcessState, error) 53 LaunchSyslogServer() (*SyslogServerState, error) 54 Wait() (*ProcessState, error) 55 ShutDown() error 56 Exit() error 57 UpdateLogConfig(logConfig *structs.LogConfig) error 58 UpdateTask(task *structs.Task) error 59 SyncServices(ctx *ConsulContext) error 60 DeregisterServices() error 61 Version() (*ExecutorVersion, error) 62 Stats() (*cstructs.TaskResourceUsage, error) 63 Signal(s os.Signal) error 64 } 65 66 // ConsulContext holds context to configure the Consul client and run checks 67 type ConsulContext struct { 68 // ConsulConfig contains the configuration information for talking 69 // with this Nomad Agent's Consul Agent. 70 ConsulConfig *config.ConsulConfig 71 72 // ContainerID is the ID of the container 73 ContainerID string 74 75 // TLSCert is the cert which docker client uses while interactng with the docker 76 // daemon over TLS 77 TLSCert string 78 79 // TLSCa is the CA which the docker client uses while interacting with the docker 80 // daeemon over TLS 81 TLSCa string 82 83 // TLSKey is the TLS key which the docker client uses while interacting with 84 // the docker daemon 85 TLSKey string 86 87 // DockerEndpoint is the endpoint of the docker daemon 88 DockerEndpoint string 89 } 90 91 // ExecutorContext holds context to configure the command user 92 // wants to run and isolate it 93 type ExecutorContext struct { 94 // TaskEnv holds information about the environment of a Task 95 TaskEnv *env.TaskEnvironment 96 97 // Task is the task whose executor is being launched 98 Task *structs.Task 99 100 // AllocID is the allocation id to which the task belongs 101 AllocID string 102 103 // TaskDir is the host path to the task's root 104 TaskDir string 105 106 // LogDir is the host path where logs should be written 107 LogDir string 108 109 // Driver is the name of the driver that invoked the executor 110 Driver string 111 112 // PortUpperBound is the upper bound of the ports that we can use to start 113 // the syslog server 114 PortUpperBound uint 115 116 // PortLowerBound is the lower bound of the ports that we can use to start 117 // the syslog server 118 PortLowerBound uint 119 } 120 121 // ExecCommand holds the user command, args, and other isolation related 122 // settings. 123 type ExecCommand struct { 124 // Cmd is the command that the user wants to run. 125 Cmd string 126 127 // Args is the args of the command that the user wants to run. 128 Args []string 129 130 // FSIsolation determines whether the command would be run in a chroot. 131 FSIsolation bool 132 133 // User is the user which the executor uses to run the command. 134 User string 135 136 // ResourceLimits determines whether resource limits are enforced by the 137 // executor. 138 ResourceLimits bool 139 } 140 141 // ProcessState holds information about the state of a user process. 142 type ProcessState struct { 143 Pid int 144 ExitCode int 145 Signal int 146 IsolationConfig *dstructs.IsolationConfig 147 Time time.Time 148 } 149 150 // nomadPid holds a pid and it's cpu percentage calculator 151 type nomadPid struct { 152 pid int 153 cpuStatsTotal *stats.CpuStats 154 cpuStatsUser *stats.CpuStats 155 cpuStatsSys *stats.CpuStats 156 } 157 158 // SyslogServerState holds the address and islation information of a launched 159 // syslog server 160 type SyslogServerState struct { 161 IsolationConfig *dstructs.IsolationConfig 162 Addr string 163 } 164 165 // ExecutorVersion is the version of the executor 166 type ExecutorVersion struct { 167 Version string 168 } 169 170 func (v *ExecutorVersion) GoString() string { 171 return v.Version 172 } 173 174 // UniversalExecutor is an implementation of the Executor which launches and 175 // supervises processes. In addition to process supervision it provides resource 176 // and file system isolation 177 type UniversalExecutor struct { 178 cmd exec.Cmd 179 ctx *ExecutorContext 180 command *ExecCommand 181 182 pids map[int]*nomadPid 183 pidLock sync.RWMutex 184 exitState *ProcessState 185 processExited chan interface{} 186 fsIsolationEnforced bool 187 188 lre *logging.FileRotator 189 lro *logging.FileRotator 190 rotatorLock sync.Mutex 191 192 shutdownCh chan struct{} 193 194 syslogServer *logging.SyslogServer 195 syslogChan chan *logging.SyslogMessage 196 197 resConCtx resourceContainerContext 198 199 consulSyncer *consul.Syncer 200 consulCtx *ConsulContext 201 totalCpuStats *stats.CpuStats 202 userCpuStats *stats.CpuStats 203 systemCpuStats *stats.CpuStats 204 logger *log.Logger 205 } 206 207 // NewExecutor returns an Executor 208 func NewExecutor(logger *log.Logger) Executor { 209 if err := shelpers.Init(); err != nil { 210 logger.Printf("[ERR] executor: unable to initialize stats: %v", err) 211 } 212 213 exec := &UniversalExecutor{ 214 logger: logger, 215 processExited: make(chan interface{}), 216 totalCpuStats: stats.NewCpuStats(), 217 userCpuStats: stats.NewCpuStats(), 218 systemCpuStats: stats.NewCpuStats(), 219 pids: make(map[int]*nomadPid), 220 } 221 222 return exec 223 } 224 225 // Version returns the api version of the executor 226 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 227 return &ExecutorVersion{Version: "1.0.0"}, nil 228 } 229 230 // SetContext is used to set the executors context and should be the first call 231 // after launching the executor. 232 func (e *UniversalExecutor) SetContext(ctx *ExecutorContext) error { 233 e.ctx = ctx 234 return nil 235 } 236 237 // LaunchCmd launches a process and returns it's state. It also configures an 238 // applies isolation on certain platforms. 239 func (e *UniversalExecutor) LaunchCmd(command *ExecCommand) (*ProcessState, error) { 240 e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " ")) 241 242 // Ensure the context has been set first 243 if e.ctx == nil { 244 return nil, fmt.Errorf("SetContext must be called before launching a command") 245 } 246 247 e.command = command 248 249 // setting the user of the process 250 if command.User != "" { 251 e.logger.Printf("[DEBUG] executor: running command as %s", command.User) 252 if err := e.runAs(command.User); err != nil { 253 return nil, err 254 } 255 } 256 257 // set the task dir as the working directory for the command 258 e.cmd.Dir = e.ctx.TaskDir 259 260 e.ctx.TaskEnv.Build() 261 // configuring the chroot, resource container, and start the plugin 262 // process in the chroot. 263 if err := e.configureIsolation(); err != nil { 264 return nil, err 265 } 266 // Apply ourselves into the resource container. The executor MUST be in 267 // the resource container before the user task is started, otherwise we 268 // are subject to a fork attack in which a process escapes isolation by 269 // immediately forking. 270 if err := e.applyLimits(os.Getpid()); err != nil { 271 return nil, err 272 } 273 274 // Setup the loggers 275 if err := e.configureLoggers(); err != nil { 276 return nil, err 277 } 278 e.cmd.Stdout = e.lro 279 e.cmd.Stderr = e.lre 280 281 // Look up the binary path and make it executable 282 absPath, err := e.lookupBin(e.ctx.TaskEnv.ReplaceEnv(command.Cmd)) 283 if err != nil { 284 return nil, err 285 } 286 287 if err := e.makeExecutable(absPath); err != nil { 288 return nil, err 289 } 290 291 path := absPath 292 293 // Determine the path to run as it may have to be relative to the chroot. 294 if e.fsIsolationEnforced { 295 rel, err := filepath.Rel(e.ctx.TaskDir, path) 296 if err != nil { 297 return nil, fmt.Errorf("failed to determine relative path base=%q target=%q: %v", e.ctx.TaskDir, path, err) 298 } 299 path = rel 300 } 301 302 // Set the commands arguments 303 e.cmd.Path = path 304 e.cmd.Args = append([]string{e.cmd.Path}, e.ctx.TaskEnv.ParseAndReplace(command.Args)...) 305 e.cmd.Env = e.ctx.TaskEnv.EnvList() 306 307 // Start the process 308 if err := e.cmd.Start(); err != nil { 309 return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.cmd.Args, err) 310 } 311 go e.collectPids() 312 go e.wait() 313 ic := e.resConCtx.getIsolationConfig() 314 return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil 315 } 316 317 // configureLoggers sets up the standard out/error file rotators 318 func (e *UniversalExecutor) configureLoggers() error { 319 e.rotatorLock.Lock() 320 defer e.rotatorLock.Unlock() 321 322 logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024) 323 if e.lro == nil { 324 lro, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stdout", e.ctx.Task.Name), 325 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 326 if err != nil { 327 return fmt.Errorf("error creating new stdout log file for %q: %v", e.ctx.Task.Name, err) 328 } 329 e.lro = lro 330 } 331 332 if e.lre == nil { 333 lre, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stderr", e.ctx.Task.Name), 334 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 335 if err != nil { 336 return fmt.Errorf("error creating new stderr log file for %q: %v", e.ctx.Task.Name, err) 337 } 338 e.lre = lre 339 } 340 return nil 341 } 342 343 // Wait waits until a process has exited and returns it's exitcode and errors 344 func (e *UniversalExecutor) Wait() (*ProcessState, error) { 345 <-e.processExited 346 return e.exitState, nil 347 } 348 349 // COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist. 350 // UpdateLogConfig updates the log configuration 351 func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error { 352 e.ctx.Task.LogConfig = logConfig 353 if e.lro == nil { 354 return fmt.Errorf("log rotator for stdout doesn't exist") 355 } 356 e.lro.MaxFiles = logConfig.MaxFiles 357 e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 358 359 if e.lre == nil { 360 return fmt.Errorf("log rotator for stderr doesn't exist") 361 } 362 e.lre.MaxFiles = logConfig.MaxFiles 363 e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 364 return nil 365 } 366 367 func (e *UniversalExecutor) UpdateTask(task *structs.Task) error { 368 e.ctx.Task = task 369 370 // Updating Log Config 371 e.rotatorLock.Lock() 372 if e.lro != nil && e.lre != nil { 373 fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024) 374 e.lro.MaxFiles = task.LogConfig.MaxFiles 375 e.lro.FileSize = fileSize 376 e.lre.MaxFiles = task.LogConfig.MaxFiles 377 e.lre.FileSize = fileSize 378 } 379 e.rotatorLock.Unlock() 380 381 // Re-syncing task with Consul agent 382 if e.consulSyncer != nil { 383 e.interpolateServices(e.ctx.Task) 384 domain := consul.NewExecutorDomain(e.ctx.AllocID, task.Name) 385 serviceMap := generateServiceKeys(e.ctx.AllocID, task.Services) 386 e.consulSyncer.SetServices(domain, serviceMap) 387 } 388 return nil 389 } 390 391 // generateServiceKeys takes a list of interpolated Nomad Services and returns a map 392 // of ServiceKeys to Nomad Services. 393 func generateServiceKeys(allocID string, services []*structs.Service) map[consul.ServiceKey]*structs.Service { 394 keys := make(map[consul.ServiceKey]*structs.Service, len(services)) 395 for _, service := range services { 396 key := consul.GenerateServiceKey(service) 397 keys[key] = service 398 } 399 return keys 400 } 401 402 func (e *UniversalExecutor) wait() { 403 defer close(e.processExited) 404 err := e.cmd.Wait() 405 ic := e.resConCtx.getIsolationConfig() 406 if err == nil { 407 e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()} 408 return 409 } 410 411 e.lre.Close() 412 e.lro.Close() 413 414 exitCode := 1 415 var signal int 416 if exitErr, ok := err.(*exec.ExitError); ok { 417 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 418 exitCode = status.ExitStatus() 419 if status.Signaled() { 420 // bash(1) uses the lower 7 bits of a uint8 421 // to indicate normal program failure (see 422 // <sysexits.h>). If a process terminates due 423 // to a signal, encode the signal number to 424 // indicate which signal caused the process 425 // to terminate. Mirror this exit code 426 // encoding scheme. 427 const exitSignalBase = 128 428 signal = int(status.Signal()) 429 exitCode = exitSignalBase + signal 430 } 431 } 432 } else { 433 e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err) 434 } 435 436 e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()} 437 } 438 439 var ( 440 // finishedErr is the error message received when trying to kill and already 441 // exited process. 442 finishedErr = "os: process already finished" 443 ) 444 445 // ClientCleanup is the cleanup routine that a Nomad Client uses to remove the 446 // reminants of a child UniversalExecutor. 447 func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error { 448 return clientCleanup(ic, pid) 449 } 450 451 // Exit cleans up the alloc directory, destroys resource container and kills the 452 // user process 453 func (e *UniversalExecutor) Exit() error { 454 var merr multierror.Error 455 if e.syslogServer != nil { 456 e.syslogServer.Shutdown() 457 } 458 459 if e.lre != nil { 460 e.lre.Close() 461 } 462 463 if e.lro != nil { 464 e.lro.Close() 465 } 466 467 if e.consulSyncer != nil { 468 e.consulSyncer.Shutdown() 469 } 470 471 // If the executor did not launch a process, return. 472 if e.command == nil { 473 return nil 474 } 475 476 // Prefer killing the process via the resource container. 477 if e.cmd.Process != nil && !e.command.ResourceLimits { 478 proc, err := os.FindProcess(e.cmd.Process.Pid) 479 if err != nil { 480 e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v", 481 e.cmd.Process.Pid, err) 482 } else if err := proc.Kill(); err != nil && err.Error() != finishedErr { 483 merr.Errors = append(merr.Errors, 484 fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err)) 485 } 486 } 487 488 if e.command.ResourceLimits { 489 if err := e.resConCtx.executorCleanup(); err != nil { 490 merr.Errors = append(merr.Errors, err) 491 } 492 } 493 return merr.ErrorOrNil() 494 } 495 496 // Shutdown sends an interrupt signal to the user process 497 func (e *UniversalExecutor) ShutDown() error { 498 if e.cmd.Process == nil { 499 return fmt.Errorf("executor.shutdown error: no process found") 500 } 501 proc, err := os.FindProcess(e.cmd.Process.Pid) 502 if err != nil { 503 return fmt.Errorf("executor.shutdown failed to find process: %v", err) 504 } 505 if runtime.GOOS == "windows" { 506 if err := proc.Kill(); err != nil && err.Error() != finishedErr { 507 return err 508 } 509 return nil 510 } 511 if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr { 512 return fmt.Errorf("executor.shutdown error: %v", err) 513 } 514 return nil 515 } 516 517 // SyncServices syncs the services of the task that the executor is running with 518 // Consul 519 func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error { 520 e.logger.Printf("[INFO] executor: registering services") 521 e.consulCtx = ctx 522 if e.consulSyncer == nil { 523 cs, err := consul.NewSyncer(ctx.ConsulConfig, e.shutdownCh, e.logger) 524 if err != nil { 525 return err 526 } 527 e.consulSyncer = cs 528 go e.consulSyncer.Run() 529 } 530 e.interpolateServices(e.ctx.Task) 531 e.consulSyncer.SetDelegatedChecks(e.createCheckMap(), e.createCheck) 532 e.consulSyncer.SetAddrFinder(e.ctx.Task.FindHostAndPortFor) 533 domain := consul.NewExecutorDomain(e.ctx.AllocID, e.ctx.Task.Name) 534 serviceMap := generateServiceKeys(e.ctx.AllocID, e.ctx.Task.Services) 535 e.consulSyncer.SetServices(domain, serviceMap) 536 return nil 537 } 538 539 // DeregisterServices removes the services of the task that the executor is 540 // running from Consul 541 func (e *UniversalExecutor) DeregisterServices() error { 542 e.logger.Printf("[INFO] executor: de-registering services and shutting down consul service") 543 if e.consulSyncer != nil { 544 return e.consulSyncer.Shutdown() 545 } 546 return nil 547 } 548 549 // pidStats returns the resource usage stats per pid 550 func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) { 551 stats := make(map[string]*cstructs.ResourceUsage) 552 e.pidLock.RLock() 553 pids := make(map[int]*nomadPid, len(e.pids)) 554 for k, v := range e.pids { 555 pids[k] = v 556 } 557 e.pidLock.RUnlock() 558 for pid, np := range pids { 559 p, err := process.NewProcess(int32(pid)) 560 if err != nil { 561 e.logger.Printf("[TRACE] executor: unable to create new process with pid: %v", pid) 562 continue 563 } 564 ms := &cstructs.MemoryStats{} 565 if memInfo, err := p.MemoryInfo(); err == nil { 566 ms.RSS = memInfo.RSS 567 ms.Swap = memInfo.Swap 568 ms.Measured = ExecutorBasicMeasuredMemStats 569 } 570 571 cs := &cstructs.CpuStats{} 572 if cpuStats, err := p.Times(); err == nil { 573 cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second)) 574 cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second)) 575 cs.Measured = ExecutorBasicMeasuredCpuStats 576 577 // calculate cpu usage percent 578 cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second)) 579 } 580 stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs} 581 } 582 583 return stats, nil 584 } 585 586 // lookupBin looks for path to the binary to run by looking for the binary in 587 // the following locations, in-order: task/local/, task/, based on host $PATH. 588 // The return path is absolute. 589 func (e *UniversalExecutor) lookupBin(bin string) (string, error) { 590 // Check in the local directory 591 local := filepath.Join(e.ctx.TaskDir, allocdir.TaskLocal, bin) 592 if _, err := os.Stat(local); err == nil { 593 return local, nil 594 } 595 596 // Check at the root of the task's directory 597 root := filepath.Join(e.ctx.TaskDir, bin) 598 if _, err := os.Stat(root); err == nil { 599 return root, nil 600 } 601 602 // Check the $PATH 603 if host, err := exec.LookPath(bin); err == nil { 604 return host, nil 605 } 606 607 return "", fmt.Errorf("binary %q could not be found", bin) 608 } 609 610 // makeExecutable makes the given file executable for root,group,others. 611 func (e *UniversalExecutor) makeExecutable(binPath string) error { 612 if runtime.GOOS == "windows" { 613 return nil 614 } 615 616 fi, err := os.Stat(binPath) 617 if err != nil { 618 if os.IsNotExist(err) { 619 return fmt.Errorf("binary %q does not exist", binPath) 620 } 621 return fmt.Errorf("specified binary is invalid: %v", err) 622 } 623 624 // If it is not executable, make it so. 625 perm := fi.Mode().Perm() 626 req := os.FileMode(0555) 627 if perm&req != req { 628 if err := os.Chmod(binPath, perm|req); err != nil { 629 return fmt.Errorf("error making %q executable: %s", binPath, err) 630 } 631 } 632 return nil 633 } 634 635 // getFreePort returns a free port ready to be listened on between upper and 636 // lower bounds 637 func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) { 638 if runtime.GOOS == "windows" { 639 return e.listenerTCP(lowerBound, upperBound) 640 } 641 642 return e.listenerUnix() 643 } 644 645 // listenerTCP creates a TCP listener using an unused port between an upper and 646 // lower bound 647 func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) { 648 for i := lowerBound; i <= upperBound; i++ { 649 addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i)) 650 if err != nil { 651 return nil, err 652 } 653 l, err := net.ListenTCP("tcp", addr) 654 if err != nil { 655 continue 656 } 657 return l, nil 658 } 659 return nil, fmt.Errorf("No free port found") 660 } 661 662 // listenerUnix creates a Unix domain socket 663 func (e *UniversalExecutor) listenerUnix() (net.Listener, error) { 664 f, err := ioutil.TempFile("", "plugin") 665 if err != nil { 666 return nil, err 667 } 668 path := f.Name() 669 670 if err := f.Close(); err != nil { 671 return nil, err 672 } 673 if err := os.Remove(path); err != nil { 674 return nil, err 675 } 676 677 return net.Listen("unix", path) 678 } 679 680 // createCheckMap creates a map of checks that the executor will handle on it's 681 // own 682 func (e *UniversalExecutor) createCheckMap() map[string]struct{} { 683 checks := map[string]struct{}{ 684 "script": struct{}{}, 685 } 686 return checks 687 } 688 689 // createCheck creates NomadCheck from a ServiceCheck 690 func (e *UniversalExecutor) createCheck(check *structs.ServiceCheck, checkID string) (consul.Check, error) { 691 if check.Type == structs.ServiceCheckScript && e.ctx.Driver == "docker" { 692 return &DockerScriptCheck{ 693 id: checkID, 694 interval: check.Interval, 695 timeout: check.Timeout, 696 containerID: e.consulCtx.ContainerID, 697 logger: e.logger, 698 cmd: check.Command, 699 args: check.Args, 700 }, nil 701 } 702 703 if check.Type == structs.ServiceCheckScript && (e.ctx.Driver == "exec" || 704 e.ctx.Driver == "raw_exec" || e.ctx.Driver == "java") { 705 return &ExecScriptCheck{ 706 id: checkID, 707 interval: check.Interval, 708 timeout: check.Timeout, 709 cmd: check.Command, 710 args: check.Args, 711 taskDir: e.ctx.TaskDir, 712 FSIsolation: e.command.FSIsolation, 713 }, nil 714 715 } 716 return nil, fmt.Errorf("couldn't create check for %v", check.Name) 717 } 718 719 // interpolateServices interpolates tags in a service and checks with values from the 720 // task's environment. 721 func (e *UniversalExecutor) interpolateServices(task *structs.Task) { 722 e.ctx.TaskEnv.Build() 723 for _, service := range task.Services { 724 for _, check := range service.Checks { 725 check.Name = e.ctx.TaskEnv.ReplaceEnv(check.Name) 726 check.Type = e.ctx.TaskEnv.ReplaceEnv(check.Type) 727 check.Command = e.ctx.TaskEnv.ReplaceEnv(check.Command) 728 check.Args = e.ctx.TaskEnv.ParseAndReplace(check.Args) 729 check.Path = e.ctx.TaskEnv.ReplaceEnv(check.Path) 730 check.Protocol = e.ctx.TaskEnv.ReplaceEnv(check.Protocol) 731 check.PortLabel = e.ctx.TaskEnv.ReplaceEnv(check.PortLabel) 732 check.InitialStatus = e.ctx.TaskEnv.ReplaceEnv(check.InitialStatus) 733 } 734 service.Name = e.ctx.TaskEnv.ReplaceEnv(service.Name) 735 service.PortLabel = e.ctx.TaskEnv.ReplaceEnv(service.PortLabel) 736 service.Tags = e.ctx.TaskEnv.ParseAndReplace(service.Tags) 737 } 738 } 739 740 // collectPids collects the pids of the child processes that the executor is 741 // running every 5 seconds 742 func (e *UniversalExecutor) collectPids() { 743 // Fire the timer right away when the executor starts from there on the pids 744 // are collected every scan interval 745 timer := time.NewTimer(0) 746 defer timer.Stop() 747 for { 748 select { 749 case <-timer.C: 750 pids, err := e.getAllPids() 751 if err != nil { 752 e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err) 753 } 754 e.pidLock.Lock() 755 756 // Adding pids which are not being tracked 757 for pid, np := range pids { 758 if _, ok := e.pids[pid]; !ok { 759 e.pids[pid] = np 760 } 761 } 762 // Removing pids which are no longer present 763 for pid := range e.pids { 764 if _, ok := pids[pid]; !ok { 765 delete(e.pids, pid) 766 } 767 } 768 e.pidLock.Unlock() 769 timer.Reset(pidScanInterval) 770 case <-e.processExited: 771 return 772 } 773 } 774 } 775 776 // scanPids scans all the pids on the machine running the current executor and 777 // returns the child processes of the executor. 778 func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) { 779 processFamily := make(map[int]struct{}) 780 processFamily[parentPid] = struct{}{} 781 782 // A mapping of pids to their parent pids. It is used to build the process 783 // tree of the executing task 784 pidsRemaining := make(map[int]int, len(allPids)) 785 for _, pid := range allPids { 786 pidsRemaining[pid.Pid()] = pid.PPid() 787 } 788 789 for { 790 // flag to indicate if we have found a match 791 foundNewPid := false 792 793 for pid, ppid := range pidsRemaining { 794 _, childPid := processFamily[ppid] 795 796 // checking if the pid is a child of any of the parents 797 if childPid { 798 processFamily[pid] = struct{}{} 799 delete(pidsRemaining, pid) 800 foundNewPid = true 801 } 802 } 803 804 // not scanning anymore if we couldn't find a single match 805 if !foundNewPid { 806 break 807 } 808 } 809 810 res := make(map[int]*nomadPid) 811 for pid := range processFamily { 812 np := nomadPid{ 813 pid: pid, 814 cpuStatsTotal: stats.NewCpuStats(), 815 cpuStatsUser: stats.NewCpuStats(), 816 cpuStatsSys: stats.NewCpuStats(), 817 } 818 res[pid] = &np 819 } 820 return res, nil 821 } 822 823 // aggregatedResourceUsage aggregates the resource usage of all the pids and 824 // returns a TaskResourceUsage data point 825 func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage { 826 ts := time.Now().UTC().UnixNano() 827 var ( 828 systemModeCPU, userModeCPU, percent float64 829 totalRSS, totalSwap uint64 830 ) 831 832 for _, pidStat := range pidStats { 833 systemModeCPU += pidStat.CpuStats.SystemMode 834 userModeCPU += pidStat.CpuStats.UserMode 835 percent += pidStat.CpuStats.Percent 836 837 totalRSS += pidStat.MemoryStats.RSS 838 totalSwap += pidStat.MemoryStats.Swap 839 } 840 841 totalCPU := &cstructs.CpuStats{ 842 SystemMode: systemModeCPU, 843 UserMode: userModeCPU, 844 Percent: percent, 845 Measured: ExecutorBasicMeasuredCpuStats, 846 TotalTicks: e.systemCpuStats.TicksConsumed(percent), 847 } 848 849 totalMemory := &cstructs.MemoryStats{ 850 RSS: totalRSS, 851 Swap: totalSwap, 852 Measured: ExecutorBasicMeasuredMemStats, 853 } 854 855 resourceUsage := cstructs.ResourceUsage{ 856 MemoryStats: totalMemory, 857 CpuStats: totalCPU, 858 } 859 return &cstructs.TaskResourceUsage{ 860 ResourceUsage: &resourceUsage, 861 Timestamp: ts, 862 Pids: pidStats, 863 } 864 } 865 866 // Signal sends the passed signal to the task 867 func (e *UniversalExecutor) Signal(s os.Signal) error { 868 if e.cmd.Process == nil { 869 return fmt.Errorf("Task not yet run") 870 } 871 872 e.logger.Printf("[DEBUG] executor: sending signal %s", s) 873 err := e.cmd.Process.Signal(s) 874 if err != nil { 875 e.logger.Printf("[ERR] executor: sending signal %v failed: %v", s, err) 876 return err 877 } 878 879 return nil 880 }