github.com/cloudfoundry-attic/garden-linux@v0.333.2-candidate/containerizer/system/process_reaper_linux.go (about)

     1  package system
     2  
     3  import (
     4  	"os"
     5  	"os/exec"
     6  	"os/signal"
     7  	"sync"
     8  	"syscall"
     9  
    10  	"github.com/pivotal-golang/lager"
    11  )
    12  
    13  type ProcessReaper struct {
    14  	mu            *sync.Mutex
    15  	waiting       map[int]chan int
    16  	monitoredPids map[int]bool // pids which we launched, to avoid confusion with processes launched by children inside the container
    17  	sigChld       chan os.Signal
    18  	log           lager.Logger
    19  
    20  	wait4 Wait4Func
    21  }
    22  
    23  type Wait4Func func(pid int, wstatus *syscall.WaitStatus, options int, rusage *syscall.Rusage) (wpid int, err error)
    24  
    25  func StartReaper(logger lager.Logger, waitSyscall Wait4Func) *ProcessReaper {
    26  	logger.Debug("start-reaper")
    27  	p := &ProcessReaper{
    28  		mu:            new(sync.Mutex),
    29  		waiting:       make(map[int]chan int),
    30  		monitoredPids: make(map[int]bool),
    31  		sigChld:       make(chan os.Signal, 1000),
    32  		log:           logger,
    33  
    34  		wait4: waitSyscall,
    35  	}
    36  
    37  	signal.Notify(p.sigChld, syscall.SIGCHLD)
    38  	go p.reapAll()
    39  	return p
    40  }
    41  
    42  func (p *ProcessReaper) Stop() {
    43  	signal.Stop(p.sigChld)
    44  }
    45  
    46  func (p *ProcessReaper) Start(cmd *exec.Cmd) error {
    47  	// Lock before starting the command to ensure p.waiting is set before Wait attempts to read it.
    48  	p.mu.Lock()
    49  	defer p.mu.Unlock()
    50  	if err := cmd.Start(); err != nil {
    51  		p.log.Error("failed to start", err, lager.Data{"cmd": cmd})
    52  		return err
    53  	}
    54  
    55  	p.log.Info("started", lager.Data{"pid": cmd.Process.Pid, "cmd": cmd})
    56  
    57  	p.waiting[cmd.Process.Pid] = make(chan int, 1)
    58  	p.monitoredPids[cmd.Process.Pid] = true
    59  	return nil
    60  }
    61  
    62  func (p *ProcessReaper) Wait(cmd *exec.Cmd) byte {
    63  	ch, ok := p.waitChan(cmd.Process.Pid)
    64  	if !ok {
    65  		panic("waited on a process that was never started")
    66  	}
    67  
    68  	found := ch != nil
    69  	p.log.Info("reaper-receiving-process-exit-status", lager.Data{"pid": cmd.Process.Pid, "found": found})
    70  	exitStatus := byte(<-ch)
    71  	p.log.Debug("reaper-wait-received-process-exit-status", lager.Data{"pid": cmd.Process.Pid, "exitStatus": exitStatus})
    72  	return exitStatus
    73  }
    74  
    75  func (p *ProcessReaper) reapAll() {
    76  	for {
    77  		p.log.Debug("reaper-waiting-for-SIGCHLD")
    78  		<-p.sigChld
    79  		p.reap()
    80  	}
    81  }
    82  
    83  func (p *ProcessReaper) reap() {
    84  	for {
    85  		p.log.Debug("reap")
    86  		var status syscall.WaitStatus
    87  		var rusage syscall.Rusage
    88  		wpid, err := p.wait4(-1, &status, syscall.WNOHANG, &rusage)
    89  
    90  		if wpid == 0 || (wpid == -1 && err.Error() == "no child processes") {
    91  			break
    92  		}
    93  
    94  		if err != nil {
    95  			p.log.Error("reaper-wait-error", err, lager.Data{"wpid": wpid})
    96  			break
    97  		}
    98  
    99  		p.log.Info("reaped", lager.Data{"pid": wpid, "status": status, "rusage": rusage})
   100  
   101  		if ch, ok := p.waitChan(wpid); p.monitoredPids[wpid] && ok {
   102  			ch <- status.ExitStatus()
   103  			p.unmonitorPid(wpid)
   104  
   105  			p.log.Info("wait-once-sent-exit-status", lager.Data{"pid": wpid, "status": status, "rusage": rusage})
   106  		} else {
   107  			p.log.Info("wait-once-not-found", lager.Data{"pid": wpid, "status": status, "rusage": rusage})
   108  		}
   109  	}
   110  }
   111  
   112  func (p *ProcessReaper) waitChan(pid int) (chan int, bool) {
   113  	p.mu.Lock()
   114  	defer p.mu.Unlock()
   115  	wChan, ok := p.waiting[pid]
   116  	return wChan, ok
   117  }
   118  
   119  func (p *ProcessReaper) unmonitorPid(pid int) {
   120  	p.mu.Lock()
   121  	defer p.mu.Unlock()
   122  	delete(p.monitoredPids, pid)
   123  }