github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/cmd/execer.go (about) 1 package cmd 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "os/exec" 8 "sync" 9 "syscall" 10 "testing" 11 "time" 12 13 "github.com/stretchr/testify/require" 14 15 "github.com/tilt-dev/tilt/internal/localexec" 16 "github.com/tilt-dev/tilt/pkg/logger" 17 "github.com/tilt-dev/tilt/pkg/model" 18 "github.com/tilt-dev/tilt/pkg/procutil" 19 ) 20 21 var DefaultGracePeriod = 30 * time.Second 22 23 type Execer interface { 24 // Returns a channel to pull status updates from. After the process exists 25 // (and transmits its final status), the channel is closed. 26 Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata 27 } 28 29 type fakeExecProcess struct { 30 closeCh chan bool 31 exitCh chan int 32 workdir string 33 env []string 34 startTime time.Time 35 } 36 37 type FakeExecer struct { 38 // really dumb/simple process management - key by the command string, and make duplicates an error 39 processes map[string]*fakeExecProcess 40 mu sync.Mutex 41 } 42 43 func NewFakeExecer() *FakeExecer { 44 return &FakeExecer{ 45 processes: make(map[string]*fakeExecProcess), 46 } 47 } 48 49 func (e *FakeExecer) Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata { 50 e.mu.Lock() 51 oldProcess, ok := e.processes[cmd.String()] 52 e.mu.Unlock() 53 if ok { 54 select { 55 case <-oldProcess.closeCh: 56 case <-time.After(5 * time.Second): 57 logger.Get(ctx).Infof("internal error: fake execer only supports one instance of each unique command at a time. tried to start a second instance of %q", cmd.Argv) 58 return nil 59 } 60 } 61 62 exitCh := make(chan int) 63 closeCh := make(chan bool) 64 65 e.mu.Lock() 66 e.processes[cmd.String()] = &fakeExecProcess{ 67 closeCh: closeCh, 68 exitCh: exitCh, 69 workdir: cmd.Dir, 70 startTime: time.Now(), 71 env: cmd.Env, 72 } 73 e.mu.Unlock() 74 75 statusCh := make(chan statusAndMetadata) 76 go func() { 77 fakeRun(ctx, cmd, w, statusCh, exitCh) 78 79 e.mu.Lock() 80 close(closeCh) 81 delete(e.processes, cmd.String()) 82 e.mu.Unlock() 83 }() 84 85 return statusCh 86 } 87 88 // stops the command with the given command, faking the specified exit code 89 func (e *FakeExecer) stop(cmd string, exitCode int) error { 90 e.mu.Lock() 91 p, ok := e.processes[cmd] 92 e.mu.Unlock() 93 if !ok { 94 return fmt.Errorf("no such process %q", cmd) 95 } 96 97 p.exitCh <- exitCode 98 e.mu.Lock() 99 delete(e.processes, cmd) 100 e.mu.Unlock() 101 return nil 102 } 103 104 func fakeRun(ctx context.Context, cmd model.Cmd, w io.Writer, statusCh chan statusAndMetadata, exitCh chan int) { 105 defer close(statusCh) 106 107 _, _ = fmt.Fprintf(w, "Starting cmd %v\n", cmd) 108 109 statusCh <- statusAndMetadata{status: Running} 110 111 select { 112 case <-ctx.Done(): 113 _, _ = fmt.Fprintf(w, "cmd %v canceled\n", cmd) 114 // this was cleaned up by the controller, so it's not an error 115 statusCh <- statusAndMetadata{status: Done, exitCode: 0} 116 case exitCode := <-exitCh: 117 _, _ = fmt.Fprintf(w, "cmd %v exited with code %d\n", cmd, exitCode) 118 // even an exit code of 0 is an error, because services aren't supposed to exit! 119 statusCh <- statusAndMetadata{status: Error, exitCode: exitCode} 120 } 121 } 122 123 func (fe *FakeExecer) RequireNoKnownProcess(t *testing.T, cmd string) { 124 t.Helper() 125 fe.mu.Lock() 126 defer fe.mu.Unlock() 127 128 _, ok := fe.processes[cmd] 129 130 require.False(t, ok, "%T should not be tracking any process with cmd %q, but it is", FakeExecer{}, cmd) 131 } 132 133 func ProvideExecer(localEnv *localexec.Env) Execer { 134 return NewProcessExecer(localEnv) 135 } 136 137 type processExecer struct { 138 gracePeriod time.Duration 139 localEnv *localexec.Env 140 } 141 142 func NewProcessExecer(localEnv *localexec.Env) *processExecer { 143 return &processExecer{ 144 gracePeriod: DefaultGracePeriod, 145 localEnv: localEnv, 146 } 147 } 148 149 func (e *processExecer) Start(ctx context.Context, cmd model.Cmd, w io.Writer) chan statusAndMetadata { 150 statusCh := make(chan statusAndMetadata) 151 152 go func() { 153 e.processRun(ctx, cmd, w, statusCh) 154 }() 155 156 return statusCh 157 } 158 159 func (e *processExecer) processRun(ctx context.Context, cmd model.Cmd, w io.Writer, statusCh chan statusAndMetadata) { 160 defer close(statusCh) 161 162 logger.Get(ctx).Infof("Running cmd: %s", cmd.String()) 163 c, err := e.localEnv.ExecCmd(cmd, logger.Get(ctx)) 164 if err != nil { 165 logger.Get(ctx).Errorf("%q invalid cmd: %v", cmd.String(), err) 166 statusCh <- statusAndMetadata{ 167 status: Error, 168 exitCode: 1, 169 reason: fmt.Sprintf("invalid cmd: %v", err), 170 } 171 return 172 } 173 174 c.SysProcAttr = &syscall.SysProcAttr{} 175 procutil.SetOptNewProcessGroup(c.SysProcAttr) 176 c.Stderr = w 177 c.Stdout = w 178 179 err = c.Start() 180 if err != nil { 181 logger.Get(ctx).Errorf("%s failed to start: %v", cmd.String(), err) 182 statusCh <- statusAndMetadata{ 183 status: Error, 184 exitCode: 1, 185 reason: fmt.Sprintf("failed to start: %v", err), 186 } 187 return 188 } 189 190 pid := c.Process.Pid 191 statusCh <- statusAndMetadata{status: Running, pid: pid} 192 193 // This is to prevent this goroutine from blocking, since we know there's only going to be one result 194 processExitCh := make(chan error, 1) 195 go func() { 196 // Cmd Wait() does not have quite the semantics we want, 197 // because it will block indefinitely on any descendant processes. 198 // This can lead to Cmd appearing to hang. 199 // 200 // Instead, we exit immediately if the main process exits. 201 // 202 // Details: 203 // https://github.com/tilt-dev/tilt/issues/4456 204 state, err := c.Process.Wait() 205 procutil.KillProcessGroup(c) 206 207 if err != nil { 208 processExitCh <- err 209 } else if !state.Success() { 210 processExitCh <- &exec.ExitError{ProcessState: state} 211 } else { 212 processExitCh <- nil 213 } 214 close(processExitCh) 215 }() 216 217 select { 218 case err := <-processExitCh: 219 exitCode := 0 220 reason := "" 221 status := Done 222 if err == nil { 223 // Use defaults 224 } else if ee, ok := err.(*exec.ExitError); ok { 225 status = Error 226 exitCode = ee.ExitCode() 227 reason = err.Error() 228 logger.Get(ctx).Debugf("%s exited with exit code %d", cmd.String(), ee.ExitCode()) 229 } else { 230 status = Error 231 exitCode = 1 232 reason = err.Error() 233 logger.Get(ctx).Errorf("error execing %s: %v", cmd.String(), err) 234 } 235 statusCh <- statusAndMetadata{status: status, pid: pid, exitCode: exitCode, reason: reason} 236 case <-ctx.Done(): 237 e.killProcess(ctx, c, processExitCh) 238 statusCh <- statusAndMetadata{status: Done, pid: pid, reason: "killed", exitCode: 137} 239 } 240 } 241 242 func (e *processExecer) killProcess(ctx context.Context, c *exec.Cmd, processExitCh chan error) { 243 logger.Get(ctx).Debugf("About to gracefully shut down process %d", c.Process.Pid) 244 err := procutil.GracefullyShutdownProcess(c.Process) 245 if err != nil { 246 logger.Get(ctx).Debugf("Unable to gracefully kill process %d, sending SIGKILL to the process group: %v", c.Process.Pid, err) 247 procutil.KillProcessGroup(c) 248 return 249 } 250 251 // we wait 30 seconds to give the process enough time to finish doing any cleanup. 252 // this is the same timeout that Kubernetes uses 253 // TODO(dmiller): make this configurable via the Tiltfile 254 infoCh := time.After(e.gracePeriod / 20) 255 moreInfoCh := time.After(e.gracePeriod / 3) 256 finalCh := time.After(e.gracePeriod) 257 258 select { 259 case <-infoCh: 260 logger.Get(ctx).Infof("Waiting %s for process to exit... (pid: %d)", e.gracePeriod, c.Process.Pid) 261 case <-processExitCh: 262 return 263 } 264 265 select { 266 case <-moreInfoCh: 267 logger.Get(ctx).Infof("Still waiting on exit... (pid: %d)", c.Process.Pid) 268 case <-processExitCh: 269 return 270 } 271 272 select { 273 case <-finalCh: 274 logger.Get(ctx).Infof("Time is up! Sending %d a kill signal", c.Process.Pid) 275 procutil.KillProcessGroup(c) 276 case <-processExitCh: 277 return 278 } 279 }