github.com/anuvu/nomad@v0.8.7-atom1/client/driver/executor/executor.go (about) 1 package executor 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "log" 9 "net" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "runtime" 14 "strconv" 15 "strings" 16 "sync" 17 "syscall" 18 "time" 19 20 "github.com/armon/circbuf" 21 "github.com/hashicorp/go-multierror" 22 "github.com/mitchellh/go-ps" 23 "github.com/shirou/gopsutil/process" 24 25 "github.com/hashicorp/nomad/client/allocdir" 26 "github.com/hashicorp/nomad/client/driver/env" 27 "github.com/hashicorp/nomad/client/driver/logging" 28 "github.com/hashicorp/nomad/client/stats" 29 shelpers "github.com/hashicorp/nomad/helper/stats" 30 "github.com/hashicorp/nomad/nomad/structs" 31 32 syslog "github.com/RackSec/srslog" 33 34 dstructs "github.com/hashicorp/nomad/client/driver/structs" 35 cstructs "github.com/hashicorp/nomad/client/structs" 36 ) 37 38 const ( 39 // pidScanInterval is the interval at which the executor scans the process 40 // tree for finding out the pids that the executor and it's child processes 41 // have forked 42 pidScanInterval = 5 * time.Second 43 44 // processOutputCloseTolerance is the length of time we will wait for the 45 // launched process to close its stdout/stderr before we force close it. If 46 // data is written after this tolerance, we will not capture it. 47 processOutputCloseTolerance = 2 * time.Second 48 ) 49 50 var ( 51 // The statistics the basic executor exposes 52 ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} 53 ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} 54 ) 55 56 // Executor is the interface which allows a driver to launch and supervise 57 // a process 58 type Executor interface { 59 SetContext(ctx *ExecutorContext) error 60 LaunchCmd(command *ExecCommand) (*ProcessState, error) 61 LaunchSyslogServer() (*SyslogServerState, error) 62 Wait() (*ProcessState, error) 63 ShutDown() error 64 Exit() error 65 UpdateLogConfig(logConfig *structs.LogConfig) error 66 UpdateTask(task *structs.Task) error 67 Version() (*ExecutorVersion, error) 68 Stats() (*cstructs.TaskResourceUsage, error) 69 Signal(s os.Signal) error 70 Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error) 71 } 72 73 // ExecutorContext holds context to configure the command user 74 // wants to run and isolate it 75 type ExecutorContext struct { 76 // TaskEnv holds information about the environment of a Task 77 TaskEnv *env.TaskEnv 78 79 // Task is the task whose executor is being launched 80 Task *structs.Task 81 82 // TaskDir is the host path to the task's root 83 TaskDir string 84 85 // LogDir is the host path where logs should be written 86 LogDir string 87 88 // Driver is the name of the driver that invoked the executor 89 Driver string 90 91 // PortUpperBound is the upper bound of the ports that we can use to start 92 // the syslog server 93 PortUpperBound uint 94 95 // PortLowerBound is the lower bound of the ports that we can use to start 96 // the syslog server 97 PortLowerBound uint 98 } 99 100 // ExecCommand holds the user command, args, and other isolation related 101 // settings. 102 type ExecCommand struct { 103 // Cmd is the command that the user wants to run. 104 Cmd string 105 106 // Args is the args of the command that the user wants to run. 107 Args []string 108 109 // TaskKillSignal is an optional field which signal to kill the process 110 TaskKillSignal os.Signal 111 112 // FSIsolation determines whether the command would be run in a chroot. 113 FSIsolation bool 114 115 // User is the user which the executor uses to run the command. 116 User string 117 118 // ResourceLimits determines whether resource limits are enforced by the 119 // executor. 120 ResourceLimits bool 121 122 // Cgroup marks whether we put the process in a cgroup. Setting this field 123 // doesn't enforce resource limits. To enforce limits, set ResourceLimits. 124 // Using the cgroup does allow more precise cleanup of processes. 125 BasicProcessCgroup bool 126 } 127 128 // ProcessState holds information about the state of a user process. 129 type ProcessState struct { 130 Pid int 131 ExitCode int 132 Signal int 133 IsolationConfig *dstructs.IsolationConfig 134 Time time.Time 135 } 136 137 // nomadPid holds a pid and it's cpu percentage calculator 138 type nomadPid struct { 139 pid int 140 cpuStatsTotal *stats.CpuStats 141 cpuStatsUser *stats.CpuStats 142 cpuStatsSys *stats.CpuStats 143 } 144 145 // SyslogServerState holds the address and isolation information of a launched 146 // syslog server 147 type SyslogServerState struct { 148 IsolationConfig *dstructs.IsolationConfig 149 Addr string 150 } 151 152 // ExecutorVersion is the version of the executor 153 type ExecutorVersion struct { 154 Version string 155 } 156 157 func (v *ExecutorVersion) GoString() string { 158 return v.Version 159 } 160 161 // UniversalExecutor is an implementation of the Executor which launches and 162 // supervises processes. In addition to process supervision it provides resource 163 // and file system isolation 164 type UniversalExecutor struct { 165 cmd exec.Cmd 166 ctx *ExecutorContext 167 command *ExecCommand 168 169 pids map[int]*nomadPid 170 pidLock sync.RWMutex 171 exitState *ProcessState 172 processExited chan interface{} 173 fsIsolationEnforced bool 174 175 lre *logRotatorWrapper 176 lro *logRotatorWrapper 177 rotatorLock sync.Mutex 178 179 syslogServer *logging.SyslogServer 180 syslogChan chan *logging.SyslogMessage 181 182 resConCtx resourceContainerContext 183 184 totalCpuStats *stats.CpuStats 185 userCpuStats *stats.CpuStats 186 systemCpuStats *stats.CpuStats 187 logger *log.Logger 188 } 189 190 // NewExecutor returns an Executor 191 func NewExecutor(logger *log.Logger) Executor { 192 if err := shelpers.Init(); err != nil { 193 logger.Printf("[ERR] executor: unable to initialize stats: %v", err) 194 } 195 196 exec := &UniversalExecutor{ 197 logger: logger, 198 processExited: make(chan interface{}), 199 totalCpuStats: stats.NewCpuStats(), 200 userCpuStats: stats.NewCpuStats(), 201 systemCpuStats: stats.NewCpuStats(), 202 pids: make(map[int]*nomadPid), 203 } 204 205 return exec 206 } 207 208 // Version returns the api version of the executor 209 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 210 return &ExecutorVersion{Version: "1.1.0"}, nil 211 } 212 213 // SetContext is used to set the executors context and should be the first call 214 // after launching the executor. 215 func (e *UniversalExecutor) SetContext(ctx *ExecutorContext) error { 216 e.ctx = ctx 217 return nil 218 } 219 220 // LaunchCmd launches the main process and returns its state. It also 221 // configures an applies isolation on certain platforms. 222 func (e *UniversalExecutor) LaunchCmd(command *ExecCommand) (*ProcessState, error) { 223 e.logger.Printf("[INFO] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " ")) 224 225 // Ensure the context has been set first 226 if e.ctx == nil { 227 return nil, fmt.Errorf("SetContext must be called before launching a command") 228 } 229 230 e.command = command 231 232 // setting the user of the process 233 if command.User != "" { 234 e.logger.Printf("[DEBUG] executor: running command as %s", command.User) 235 if err := e.runAs(command.User); err != nil { 236 return nil, err 237 } 238 } 239 240 // set the task dir as the working directory for the command 241 e.cmd.Dir = e.ctx.TaskDir 242 243 // start command in separate process group 244 if err := e.setNewProcessGroup(); err != nil { 245 return nil, err 246 } 247 248 // configuring the chroot, resource container, and start the plugin 249 // process in the chroot. 250 if err := e.configureIsolation(); err != nil { 251 return nil, err 252 } 253 // Apply ourselves into the resource container. The executor MUST be in 254 // the resource container before the user task is started, otherwise we 255 // are subject to a fork attack in which a process escapes isolation by 256 // immediately forking. 257 if err := e.applyLimits(os.Getpid()); err != nil { 258 return nil, err 259 } 260 261 // Setup the loggers 262 if err := e.configureLoggers(); err != nil { 263 return nil, err 264 } 265 e.cmd.Stdout = e.lro.processOutWriter 266 e.cmd.Stderr = e.lre.processOutWriter 267 268 // Look up the binary path and make it executable 269 absPath, err := e.lookupBin(e.ctx.TaskEnv.ReplaceEnv(command.Cmd)) 270 if err != nil { 271 return nil, err 272 } 273 274 if err := e.makeExecutable(absPath); err != nil { 275 return nil, err 276 } 277 278 path := absPath 279 280 // Determine the path to run as it may have to be relative to the chroot. 281 if e.fsIsolationEnforced { 282 rel, err := filepath.Rel(e.ctx.TaskDir, path) 283 if err != nil { 284 return nil, fmt.Errorf("failed to determine relative path base=%q target=%q: %v", e.ctx.TaskDir, path, err) 285 } 286 path = rel 287 } 288 289 // Set the commands arguments 290 e.cmd.Path = path 291 e.cmd.Args = append([]string{e.cmd.Path}, e.ctx.TaskEnv.ParseAndReplace(command.Args)...) 292 e.cmd.Env = e.ctx.TaskEnv.List() 293 294 // Start the process 295 if err := e.cmd.Start(); err != nil { 296 return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.cmd.Args, err) 297 } 298 299 // Close the files. This is copied from the os/exec package. 300 e.lro.processOutWriter.Close() 301 e.lre.processOutWriter.Close() 302 303 go e.collectPids() 304 go e.wait() 305 ic := e.resConCtx.getIsolationConfig() 306 return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil 307 } 308 309 // Exec a command inside a container for exec and java drivers. 310 func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) { 311 ctx, cancel := context.WithDeadline(context.Background(), deadline) 312 defer cancel() 313 return ExecScript(ctx, e.cmd.Dir, e.ctx.TaskEnv, e.cmd.SysProcAttr, name, args) 314 } 315 316 // ExecScript executes cmd with args and returns the output, exit code, and 317 // error. Output is truncated to client/driver/structs.CheckBufSize 318 func ExecScript(ctx context.Context, dir string, env *env.TaskEnv, attrs *syscall.SysProcAttr, 319 name string, args []string) ([]byte, int, error) { 320 name = env.ReplaceEnv(name) 321 cmd := exec.CommandContext(ctx, name, env.ParseAndReplace(args)...) 322 323 // Copy runtime environment from the main command 324 cmd.SysProcAttr = attrs 325 cmd.Dir = dir 326 cmd.Env = env.List() 327 328 // Capture output 329 buf, _ := circbuf.NewBuffer(int64(dstructs.CheckBufSize)) 330 cmd.Stdout = buf 331 cmd.Stderr = buf 332 333 if err := cmd.Run(); err != nil { 334 exitErr, ok := err.(*exec.ExitError) 335 if !ok { 336 // Non-exit error, return it and let the caller treat 337 // it as a critical failure 338 return nil, 0, err 339 } 340 341 // Some kind of error happened; default to critical 342 exitCode := 2 343 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 344 exitCode = status.ExitStatus() 345 } 346 347 // Don't return the exitError as the caller only needs the 348 // output and code. 349 return buf.Bytes(), exitCode, nil 350 } 351 return buf.Bytes(), 0, nil 352 } 353 354 // configureLoggers sets up the standard out/error file rotators 355 func (e *UniversalExecutor) configureLoggers() error { 356 e.rotatorLock.Lock() 357 defer e.rotatorLock.Unlock() 358 359 logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024) 360 if e.lro == nil { 361 lro, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stdout", e.ctx.Task.Name), 362 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 363 if err != nil { 364 return fmt.Errorf("error creating new stdout log file for %q: %v", e.ctx.Task.Name, err) 365 } 366 367 r, err := newLogRotatorWrapper(e.logger, lro) 368 if err != nil { 369 return err 370 } 371 e.lro = r 372 } 373 374 if e.lre == nil { 375 lre, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stderr", e.ctx.Task.Name), 376 e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger) 377 if err != nil { 378 return fmt.Errorf("error creating new stderr log file for %q: %v", e.ctx.Task.Name, err) 379 } 380 381 r, err := newLogRotatorWrapper(e.logger, lre) 382 if err != nil { 383 return err 384 } 385 e.lre = r 386 } 387 return nil 388 } 389 390 // Wait waits until a process has exited and returns it's exitcode and errors 391 func (e *UniversalExecutor) Wait() (*ProcessState, error) { 392 <-e.processExited 393 return e.exitState, nil 394 } 395 396 // COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist. 397 // UpdateLogConfig updates the log configuration 398 func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error { 399 e.ctx.Task.LogConfig = logConfig 400 if e.lro == nil { 401 return fmt.Errorf("log rotator for stdout doesn't exist") 402 } 403 e.lro.rotatorWriter.MaxFiles = logConfig.MaxFiles 404 e.lro.rotatorWriter.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 405 406 if e.lre == nil { 407 return fmt.Errorf("log rotator for stderr doesn't exist") 408 } 409 e.lre.rotatorWriter.MaxFiles = logConfig.MaxFiles 410 e.lre.rotatorWriter.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024) 411 return nil 412 } 413 414 func (e *UniversalExecutor) UpdateTask(task *structs.Task) error { 415 e.ctx.Task = task 416 417 // Updating Log Config 418 e.rotatorLock.Lock() 419 if e.lro != nil && e.lre != nil { 420 fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024) 421 e.lro.rotatorWriter.MaxFiles = task.LogConfig.MaxFiles 422 e.lro.rotatorWriter.FileSize = fileSize 423 e.lre.rotatorWriter.MaxFiles = task.LogConfig.MaxFiles 424 e.lre.rotatorWriter.FileSize = fileSize 425 } 426 e.rotatorLock.Unlock() 427 return nil 428 } 429 430 func (e *UniversalExecutor) wait() { 431 defer close(e.processExited) 432 err := e.cmd.Wait() 433 ic := e.resConCtx.getIsolationConfig() 434 if err == nil { 435 e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()} 436 return 437 } 438 439 e.lre.Close() 440 e.lro.Close() 441 442 exitCode := 1 443 var signal int 444 if exitErr, ok := err.(*exec.ExitError); ok { 445 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 446 exitCode = status.ExitStatus() 447 if status.Signaled() { 448 // bash(1) uses the lower 7 bits of a uint8 449 // to indicate normal program failure (see 450 // <sysexits.h>). If a process terminates due 451 // to a signal, encode the signal number to 452 // indicate which signal caused the process 453 // to terminate. Mirror this exit code 454 // encoding scheme. 455 const exitSignalBase = 128 456 signal = int(status.Signal()) 457 exitCode = exitSignalBase + signal 458 } 459 } 460 } else { 461 e.logger.Printf("[WARN] executor: unexpected Cmd.Wait() error type: %v", err) 462 } 463 464 e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()} 465 } 466 467 var ( 468 // finishedErr is the error message received when trying to kill and already 469 // exited process. 470 finishedErr = "os: process already finished" 471 472 // noSuchProcessErr is the error message received when trying to kill a non 473 // existing process (e.g. when killing a process group). 474 noSuchProcessErr = "no such process" 475 ) 476 477 // ClientCleanup is the cleanup routine that a Nomad Client uses to remove the 478 // remnants of a child UniversalExecutor. 479 func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error { 480 return clientCleanup(ic, pid) 481 } 482 483 // Exit cleans up the alloc directory, destroys resource container and kills the 484 // user process 485 func (e *UniversalExecutor) Exit() error { 486 var merr multierror.Error 487 if e.syslogServer != nil { 488 e.syslogServer.Shutdown() 489 } 490 491 if e.lre != nil { 492 e.lre.Close() 493 } 494 495 if e.lro != nil { 496 e.lro.Close() 497 } 498 499 // If the executor did not launch a process, return. 500 if e.command == nil { 501 return nil 502 } 503 504 // Prefer killing the process via the resource container. 505 if e.cmd.Process != nil && !(e.command.ResourceLimits || e.command.BasicProcessCgroup) { 506 proc, err := os.FindProcess(e.cmd.Process.Pid) 507 if err != nil { 508 e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v", 509 e.cmd.Process.Pid, err) 510 } else if err := e.cleanupChildProcesses(proc); err != nil && err.Error() != finishedErr { 511 merr.Errors = append(merr.Errors, 512 fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err)) 513 } 514 } 515 516 if e.command.ResourceLimits || e.command.BasicProcessCgroup { 517 if err := e.resConCtx.executorCleanup(); err != nil { 518 merr.Errors = append(merr.Errors, err) 519 } 520 } 521 return merr.ErrorOrNil() 522 } 523 524 // Shutdown sends an interrupt signal to the user process 525 func (e *UniversalExecutor) ShutDown() error { 526 if e.cmd.Process == nil { 527 return fmt.Errorf("executor.shutdown error: no process found") 528 } 529 proc, err := os.FindProcess(e.cmd.Process.Pid) 530 if err != nil { 531 return fmt.Errorf("executor.shutdown failed to find process: %v", err) 532 } 533 return e.shutdownProcess(proc) 534 } 535 536 // pidStats returns the resource usage stats per pid 537 func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) { 538 stats := make(map[string]*cstructs.ResourceUsage) 539 e.pidLock.RLock() 540 pids := make(map[int]*nomadPid, len(e.pids)) 541 for k, v := range e.pids { 542 pids[k] = v 543 } 544 e.pidLock.RUnlock() 545 for pid, np := range pids { 546 p, err := process.NewProcess(int32(pid)) 547 if err != nil { 548 e.logger.Printf("[TRACE] executor: unable to create new process with pid: %v", pid) 549 continue 550 } 551 ms := &cstructs.MemoryStats{} 552 if memInfo, err := p.MemoryInfo(); err == nil { 553 ms.RSS = memInfo.RSS 554 ms.Swap = memInfo.Swap 555 ms.Measured = ExecutorBasicMeasuredMemStats 556 } 557 558 cs := &cstructs.CpuStats{} 559 if cpuStats, err := p.Times(); err == nil { 560 cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second)) 561 cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second)) 562 cs.Measured = ExecutorBasicMeasuredCpuStats 563 564 // calculate cpu usage percent 565 cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second)) 566 } 567 stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs} 568 } 569 570 return stats, nil 571 } 572 573 // lookupBin looks for path to the binary to run by looking for the binary in 574 // the following locations, in-order: task/local/, task/, based on host $PATH. 575 // The return path is absolute. 576 func (e *UniversalExecutor) lookupBin(bin string) (string, error) { 577 // Check in the local directory 578 local := filepath.Join(e.ctx.TaskDir, allocdir.TaskLocal, bin) 579 if _, err := os.Stat(local); err == nil { 580 return local, nil 581 } 582 583 // Check at the root of the task's directory 584 root := filepath.Join(e.ctx.TaskDir, bin) 585 if _, err := os.Stat(root); err == nil { 586 return root, nil 587 } 588 589 // Check the $PATH 590 if host, err := exec.LookPath(bin); err == nil { 591 return host, nil 592 } 593 594 return "", fmt.Errorf("binary %q could not be found", bin) 595 } 596 597 // makeExecutable makes the given file executable for root,group,others. 598 func (e *UniversalExecutor) makeExecutable(binPath string) error { 599 if runtime.GOOS == "windows" { 600 return nil 601 } 602 603 fi, err := os.Stat(binPath) 604 if err != nil { 605 if os.IsNotExist(err) { 606 return fmt.Errorf("binary %q does not exist", binPath) 607 } 608 return fmt.Errorf("specified binary is invalid: %v", err) 609 } 610 611 // If it is not executable, make it so. 612 perm := fi.Mode().Perm() 613 req := os.FileMode(0555) 614 if perm&req != req { 615 if err := os.Chmod(binPath, perm|req); err != nil { 616 return fmt.Errorf("error making %q executable: %s", binPath, err) 617 } 618 } 619 return nil 620 } 621 622 // getFreePort returns a free port ready to be listened on between upper and 623 // lower bounds 624 func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) { 625 if runtime.GOOS == "windows" { 626 return e.listenerTCP(lowerBound, upperBound) 627 } 628 629 return e.listenerUnix() 630 } 631 632 // listenerTCP creates a TCP listener using an unused port between an upper and 633 // lower bound 634 func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) { 635 for i := lowerBound; i <= upperBound; i++ { 636 addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i)) 637 if err != nil { 638 return nil, err 639 } 640 l, err := net.ListenTCP("tcp", addr) 641 if err != nil { 642 continue 643 } 644 return l, nil 645 } 646 return nil, fmt.Errorf("No free port found") 647 } 648 649 // listenerUnix creates a Unix domain socket 650 func (e *UniversalExecutor) listenerUnix() (net.Listener, error) { 651 f, err := ioutil.TempFile("", "plugin") 652 if err != nil { 653 return nil, err 654 } 655 path := f.Name() 656 657 if err := f.Close(); err != nil { 658 return nil, err 659 } 660 if err := os.Remove(path); err != nil { 661 return nil, err 662 } 663 664 return net.Listen("unix", path) 665 } 666 667 // collectPids collects the pids of the child processes that the executor is 668 // running every 5 seconds 669 func (e *UniversalExecutor) collectPids() { 670 // Fire the timer right away when the executor starts from there on the pids 671 // are collected every scan interval 672 timer := time.NewTimer(0) 673 defer timer.Stop() 674 for { 675 select { 676 case <-timer.C: 677 pids, err := e.getAllPids() 678 if err != nil { 679 e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err) 680 } 681 e.pidLock.Lock() 682 683 // Adding pids which are not being tracked 684 for pid, np := range pids { 685 if _, ok := e.pids[pid]; !ok { 686 e.pids[pid] = np 687 } 688 } 689 // Removing pids which are no longer present 690 for pid := range e.pids { 691 if _, ok := pids[pid]; !ok { 692 delete(e.pids, pid) 693 } 694 } 695 e.pidLock.Unlock() 696 timer.Reset(pidScanInterval) 697 case <-e.processExited: 698 return 699 } 700 } 701 } 702 703 // scanPids scans all the pids on the machine running the current executor and 704 // returns the child processes of the executor. 705 func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) { 706 processFamily := make(map[int]struct{}) 707 processFamily[parentPid] = struct{}{} 708 709 // A mapping of pids to their parent pids. It is used to build the process 710 // tree of the executing task 711 pidsRemaining := make(map[int]int, len(allPids)) 712 for _, pid := range allPids { 713 pidsRemaining[pid.Pid()] = pid.PPid() 714 } 715 716 for { 717 // flag to indicate if we have found a match 718 foundNewPid := false 719 720 for pid, ppid := range pidsRemaining { 721 _, childPid := processFamily[ppid] 722 723 // checking if the pid is a child of any of the parents 724 if childPid { 725 processFamily[pid] = struct{}{} 726 delete(pidsRemaining, pid) 727 foundNewPid = true 728 } 729 } 730 731 // not scanning anymore if we couldn't find a single match 732 if !foundNewPid { 733 break 734 } 735 } 736 737 res := make(map[int]*nomadPid) 738 for pid := range processFamily { 739 np := nomadPid{ 740 pid: pid, 741 cpuStatsTotal: stats.NewCpuStats(), 742 cpuStatsUser: stats.NewCpuStats(), 743 cpuStatsSys: stats.NewCpuStats(), 744 } 745 res[pid] = &np 746 } 747 return res, nil 748 } 749 750 // aggregatedResourceUsage aggregates the resource usage of all the pids and 751 // returns a TaskResourceUsage data point 752 func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage { 753 ts := time.Now().UTC().UnixNano() 754 var ( 755 systemModeCPU, userModeCPU, percent float64 756 totalRSS, totalSwap uint64 757 ) 758 759 for _, pidStat := range pidStats { 760 systemModeCPU += pidStat.CpuStats.SystemMode 761 userModeCPU += pidStat.CpuStats.UserMode 762 percent += pidStat.CpuStats.Percent 763 764 totalRSS += pidStat.MemoryStats.RSS 765 totalSwap += pidStat.MemoryStats.Swap 766 } 767 768 totalCPU := &cstructs.CpuStats{ 769 SystemMode: systemModeCPU, 770 UserMode: userModeCPU, 771 Percent: percent, 772 Measured: ExecutorBasicMeasuredCpuStats, 773 TotalTicks: e.systemCpuStats.TicksConsumed(percent), 774 } 775 776 totalMemory := &cstructs.MemoryStats{ 777 RSS: totalRSS, 778 Swap: totalSwap, 779 Measured: ExecutorBasicMeasuredMemStats, 780 } 781 782 resourceUsage := cstructs.ResourceUsage{ 783 MemoryStats: totalMemory, 784 CpuStats: totalCPU, 785 } 786 return &cstructs.TaskResourceUsage{ 787 ResourceUsage: &resourceUsage, 788 Timestamp: ts, 789 Pids: pidStats, 790 } 791 } 792 793 // Signal sends the passed signal to the task 794 func (e *UniversalExecutor) Signal(s os.Signal) error { 795 if e.cmd.Process == nil { 796 return fmt.Errorf("Task not yet run") 797 } 798 799 e.logger.Printf("[DEBUG] executor: sending signal %s to PID %d", s, e.cmd.Process.Pid) 800 err := e.cmd.Process.Signal(s) 801 if err != nil { 802 e.logger.Printf("[ERR] executor: sending signal %v failed: %v", s, err) 803 return err 804 } 805 806 return nil 807 } 808 809 func (e *UniversalExecutor) LaunchSyslogServer() (*SyslogServerState, error) { 810 // Ensure the context has been set first 811 if e.ctx == nil { 812 return nil, fmt.Errorf("SetContext must be called before launching the Syslog Server") 813 } 814 815 e.syslogChan = make(chan *logging.SyslogMessage, 2048) 816 l, err := e.getListener(e.ctx.PortLowerBound, e.ctx.PortUpperBound) 817 if err != nil { 818 return nil, err 819 } 820 e.logger.Printf("[DEBUG] syslog-server: launching syslog server on addr: %v", l.Addr().String()) 821 if err := e.configureLoggers(); err != nil { 822 return nil, err 823 } 824 825 e.syslogServer = logging.NewSyslogServer(l, e.syslogChan, e.logger) 826 go e.syslogServer.Start() 827 go e.collectLogs(e.lre.rotatorWriter, e.lro.rotatorWriter) 828 syslogAddr := fmt.Sprintf("%s://%s", l.Addr().Network(), l.Addr().String()) 829 return &SyslogServerState{Addr: syslogAddr}, nil 830 } 831 832 func (e *UniversalExecutor) collectLogs(we io.Writer, wo io.Writer) { 833 for logParts := range e.syslogChan { 834 // If the severity of the log line is err then we write to stderr 835 // otherwise all messages go to stdout 836 if logParts.Severity == syslog.LOG_ERR { 837 we.Write(logParts.Message) 838 we.Write([]byte{'\n'}) 839 } else { 840 wo.Write(logParts.Message) 841 wo.Write([]byte{'\n'}) 842 } 843 } 844 } 845 846 // logRotatorWrapper wraps our log rotator and exposes a pipe that can feed the 847 // log rotator data. The processOutWriter should be attached to the process and 848 // data will be copied from the reader to the rotator. 849 type logRotatorWrapper struct { 850 processOutWriter *os.File 851 processOutReader *os.File 852 rotatorWriter *logging.FileRotator 853 hasFinishedCopied chan struct{} 854 logger *log.Logger 855 } 856 857 // newLogRotatorWrapper takes a rotator and returns a wrapper that has the 858 // processOutWriter to attach to the processes stdout or stderr. 859 func newLogRotatorWrapper(logger *log.Logger, rotator *logging.FileRotator) (*logRotatorWrapper, error) { 860 r, w, err := os.Pipe() 861 if err != nil { 862 return nil, fmt.Errorf("failed to create os.Pipe for extracting logs: %v", err) 863 } 864 865 wrap := &logRotatorWrapper{ 866 processOutWriter: w, 867 processOutReader: r, 868 rotatorWriter: rotator, 869 hasFinishedCopied: make(chan struct{}), 870 logger: logger, 871 } 872 wrap.start() 873 return wrap, nil 874 } 875 876 // start starts a go-routine that copies from the pipe into the rotator. This is 877 // called by the constructor and not the user of the wrapper. 878 func (l *logRotatorWrapper) start() { 879 go func() { 880 defer close(l.hasFinishedCopied) 881 _, err := io.Copy(l.rotatorWriter, l.processOutReader) 882 if err != nil { 883 // Close reader to propagate io error across pipe. 884 // Note that this may block until the process exits on 885 // Windows due to 886 // https://github.com/PowerShell/PowerShell/issues/4254 887 // or similar issues. Since this is already running in 888 // a goroutine its safe to block until the process is 889 // force-killed. 890 l.processOutReader.Close() 891 } 892 }() 893 return 894 } 895 896 // Close closes the rotator and the process writer to ensure that the Wait 897 // command exits. 898 func (l *logRotatorWrapper) Close() { 899 // Wait up to the close tolerance before we force close 900 select { 901 case <-l.hasFinishedCopied: 902 case <-time.After(processOutputCloseTolerance): 903 } 904 905 // Closing the read side of a pipe may block on Windows if the process 906 // is being debugged as in: 907 // https://github.com/PowerShell/PowerShell/issues/4254 908 // The pipe will be closed and cleaned up when the process exits. 909 closeDone := make(chan struct{}) 910 go func() { 911 defer close(closeDone) 912 err := l.processOutReader.Close() 913 if err != nil && !strings.Contains(err.Error(), "file already closed") { 914 l.logger.Printf("[WARN] executor: error closing read-side of process output pipe: %v", err) 915 } 916 917 }() 918 919 select { 920 case <-closeDone: 921 case <-time.After(processOutputCloseTolerance): 922 l.logger.Printf("[WARN] executor: timed out waiting for read-side of process output pipe to close") 923 } 924 925 l.rotatorWriter.Close() 926 return 927 }