github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/cmd/execer.go

github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/cmd/execer.go (about)

     1  package cmd
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"os/exec"
     8  	"sync"
     9  	"syscall"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/stretchr/testify/require"
    14  
    15  	"github.com/tilt-dev/tilt/internal/localexec"
    16  	"github.com/tilt-dev/tilt/pkg/logger"
    17  	"github.com/tilt-dev/tilt/pkg/model"
    18  	"github.com/tilt-dev/tilt/pkg/procutil"
    19  )
    20  
    21  var DefaultGracePeriod = 30 * time.Second
    22  
    23  type Execer interface {
    24  	// Returns a channel to pull status updates from. After the process exists
    25  	// (and transmits its final status), the channel is closed.
    26  	Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata
    27  }
    28  
    29  type fakeExecProcess struct {
    30  	closeCh   chan bool
    31  	exitCh    chan int
    32  	workdir   string
    33  	env       []string
    34  	startTime time.Time
    35  }
    36  
    37  type FakeExecer struct {
    38  	// really dumb/simple process management - key by the command string, and make duplicates an error
    39  	processes map[string]*fakeExecProcess
    40  	mu        sync.Mutex
    41  }
    42  
    43  func NewFakeExecer() *FakeExecer {
    44  	return &FakeExecer{
    45  		processes: make(map[string]*fakeExecProcess),
    46  	}
    47  }
    48  
    49  func (e *FakeExecer) Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata {
    50  	e.mu.Lock()
    51  	oldProcess, ok := e.processes[cmd.String()]
    52  	e.mu.Unlock()
    53  	if ok {
    54  		select {
    55  		case <-oldProcess.closeCh:
    56  		case <-time.After(5 * time.Second):
    57  			logger.Get(ctx).Infof("internal error: fake execer only supports one instance of each unique command at a time. tried to start a second instance of %q", cmd.Argv)
    58  			return nil
    59  		}
    60  	}
    61  
    62  	exitCh := make(chan int)
    63  	closeCh := make(chan bool)
    64  
    65  	e.mu.Lock()
    66  	e.processes[cmd.String()] = &fakeExecProcess{
    67  		closeCh:   closeCh,
    68  		exitCh:    exitCh,
    69  		workdir:   cmd.Dir,
    70  		startTime: time.Now(),
    71  		env:       cmd.Env,
    72  	}
    73  	e.mu.Unlock()
    74  
    75  	statusCh := make(chan statusAndMetadata)
    76  	go func() {
    77  		fakeRun(ctx, cmd, w, statusCh, exitCh)
    78  
    79  		e.mu.Lock()
    80  		close(closeCh)
    81  		delete(e.processes, cmd.String())
    82  		e.mu.Unlock()
    83  	}()
    84  
    85  	return statusCh
    86  }
    87  
    88  // stops the command with the given command, faking the specified exit code
    89  func (e *FakeExecer) stop(cmd string, exitCode int) error {
    90  	e.mu.Lock()
    91  	p, ok := e.processes[cmd]
    92  	e.mu.Unlock()
    93  	if !ok {
    94  		return fmt.Errorf("no such process %q", cmd)
    95  	}
    96  
    97  	p.exitCh <- exitCode
    98  	e.mu.Lock()
    99  	delete(e.processes, cmd)
   100  	e.mu.Unlock()
   101  	return nil
   102  }
   103  
   104  func fakeRun(ctx context.Context, cmd model.Cmd, w io.Writer, statusCh chan statusAndMetadata, exitCh chan int) {
   105  	defer close(statusCh)
   106  
   107  	_, _ = fmt.Fprintf(w, "Starting cmd %v\n", cmd)
   108  
   109  	statusCh <- statusAndMetadata{status: Running}
   110  
   111  	select {
   112  	case <-ctx.Done():
   113  		_, _ = fmt.Fprintf(w, "cmd %v canceled\n", cmd)
   114  		// this was cleaned up by the controller, so it's not an error
   115  		statusCh <- statusAndMetadata{status: Done, exitCode: 0}
   116  	case exitCode := <-exitCh:
   117  		_, _ = fmt.Fprintf(w, "cmd %v exited with code %d\n", cmd, exitCode)
   118  		// even an exit code of 0 is an error, because services aren't supposed to exit!
   119  		statusCh <- statusAndMetadata{status: Error, exitCode: exitCode}
   120  	}
   121  }
   122  
   123  func (fe *FakeExecer) RequireNoKnownProcess(t *testing.T, cmd string) {
   124  	t.Helper()
   125  	fe.mu.Lock()
   126  	defer fe.mu.Unlock()
   127  
   128  	_, ok := fe.processes[cmd]
   129  
   130  	require.False(t, ok, "%T should not be tracking any process with cmd %q, but it is", FakeExecer{}, cmd)
   131  }
   132  
   133  func ProvideExecer(localEnv *localexec.Env) Execer {
   134  	return NewProcessExecer(localEnv)
   135  }
   136  
   137  type processExecer struct {
   138  	gracePeriod time.Duration
   139  	localEnv    *localexec.Env
   140  }
   141  
   142  func NewProcessExecer(localEnv *localexec.Env) *processExecer {
   143  	return &processExecer{
   144  		gracePeriod: DefaultGracePeriod,
   145  		localEnv:    localEnv,
   146  	}
   147  }
   148  
   149  func (e *processExecer) Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata {
   150  	statusCh := make(chan statusAndMetadata)
   151  
   152  	go func() {
   153  		e.processRun(ctx, cmd, w, statusCh)
   154  	}()
   155  
   156  	return statusCh
   157  }
   158  
   159  func (e *processExecer) processRun(ctx context.Context, cmd model.Cmd, w io.Writer, statusCh chan statusAndMetadata) {
   160  	defer close(statusCh)
   161  
   162  	logger.Get(ctx).Infof("Running cmd: %s", cmd.String())
   163  	c, err := e.localEnv.ExecCmd(cmd, logger.Get(ctx))
   164  	if err != nil {
   165  		logger.Get(ctx).Errorf("%q invalid cmd: %v", cmd.String(), err)
   166  		statusCh <- statusAndMetadata{
   167  			status:   Error,
   168  			exitCode: 1,
   169  			reason:   fmt.Sprintf("invalid cmd: %v", err),
   170  		}
   171  		return
   172  	}
   173  
   174  	c.SysProcAttr = &syscall.SysProcAttr{}
   175  	procutil.SetOptNewProcessGroup(c.SysProcAttr)
   176  	c.Stderr = w
   177  	c.Stdout = w
   178  
   179  	err = c.Start()
   180  	if err != nil {
   181  		logger.Get(ctx).Errorf("%s failed to start: %v", cmd.String(), err)
   182  		statusCh <- statusAndMetadata{
   183  			status:   Error,
   184  			exitCode: 1,
   185  			reason:   fmt.Sprintf("failed to start: %v", err),
   186  		}
   187  		return
   188  	}
   189  
   190  	pid := c.Process.Pid
   191  	statusCh <- statusAndMetadata{status: Running, pid: pid}
   192  
   193  	// This is to prevent this goroutine from blocking, since we know there's only going to be one result
   194  	processExitCh := make(chan error, 1)
   195  	go func() {
   196  		// Cmd Wait() does not have quite the semantics we want,
   197  		// because it will block indefinitely on any descendant processes.
   198  		// This can lead to Cmd appearing to hang.
   199  		//
   200  		// Instead, we exit immediately if the main process exits.
   201  		//
   202  		// Details:
   203  		// https://github.com/tilt-dev/tilt/issues/4456
   204  		state, err := c.Process.Wait()
   205  		procutil.KillProcessGroup(c)
   206  
   207  		if err != nil {
   208  			processExitCh <- err
   209  		} else if !state.Success() {
   210  			processExitCh <- &exec.ExitError{ProcessState: state}
   211  		} else {
   212  			processExitCh <- nil
   213  		}
   214  		close(processExitCh)
   215  	}()
   216  
   217  	select {
   218  	case err := <-processExitCh:
   219  		exitCode := 0
   220  		reason := ""
   221  		status := Done
   222  		if err == nil {
   223  			// Use defaults
   224  		} else if ee, ok := err.(*exec.ExitError); ok {
   225  			status = Error
   226  			exitCode = ee.ExitCode()
   227  			reason = err.Error()
   228  			logger.Get(ctx).Debugf("%s exited with exit code %d", cmd.String(), ee.ExitCode())
   229  		} else {
   230  			status = Error
   231  			exitCode = 1
   232  			reason = err.Error()
   233  			logger.Get(ctx).Errorf("error execing %s: %v", cmd.String(), err)
   234  		}
   235  		statusCh <- statusAndMetadata{status: status, pid: pid, exitCode: exitCode, reason: reason}
   236  	case <-ctx.Done():
   237  		e.killProcess(ctx, c, processExitCh)
   238  		statusCh <- statusAndMetadata{status: Done, pid: pid, reason: "killed", exitCode: 137}
   239  	}
   240  }
   241  
   242  func (e *processExecer) killProcess(ctx context.Context, c *exec.Cmd, processExitCh chan error) {
   243  	logger.Get(ctx).Debugf("About to gracefully shut down process %d", c.Process.Pid)
   244  	err := procutil.GracefullyShutdownProcess(c.Process)
   245  	if err != nil {
   246  		logger.Get(ctx).Debugf("Unable to gracefully kill process %d, sending SIGKILL to the process group: %v", c.Process.Pid, err)
   247  		procutil.KillProcessGroup(c)
   248  		return
   249  	}
   250  
   251  	// we wait 30 seconds to give the process enough time to finish doing any cleanup.
   252  	// this is the same timeout that Kubernetes uses
   253  	// TODO(dmiller): make this configurable via the Tiltfile
   254  	infoCh := time.After(e.gracePeriod / 20)
   255  	moreInfoCh := time.After(e.gracePeriod / 3)
   256  	finalCh := time.After(e.gracePeriod)
   257  
   258  	select {
   259  	case <-infoCh:
   260  		logger.Get(ctx).Infof("Waiting %s for process to exit... (pid: %d)", e.gracePeriod, c.Process.Pid)
   261  	case <-processExitCh:
   262  		return
   263  	}
   264  
   265  	select {
   266  	case <-moreInfoCh:
   267  		logger.Get(ctx).Infof("Still waiting on exit... (pid: %d)", c.Process.Pid)
   268  	case <-processExitCh:
   269  		return
   270  	}
   271  
   272  	select {
   273  	case <-finalCh:
   274  		logger.Get(ctx).Infof("Time is up! Sending %d a kill signal", c.Process.Pid)
   275  		procutil.KillProcessGroup(c)
   276  	case <-processExitCh:
   277  		return
   278  	}
   279  }