github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/controllers/core/cmd/controller.go (about)

     1  package cmd
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/jonboulle/clockwork"
    12  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    13  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    14  	"k8s.io/apimachinery/pkg/runtime"
    15  	"k8s.io/apimachinery/pkg/types"
    16  	ctrl "sigs.k8s.io/controller-runtime"
    17  	"sigs.k8s.io/controller-runtime/pkg/builder"
    18  	"sigs.k8s.io/controller-runtime/pkg/client"
    19  	ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
    20  	"sigs.k8s.io/controller-runtime/pkg/handler"
    21  
    22  	"github.com/tilt-dev/probe/pkg/probe"
    23  	"github.com/tilt-dev/probe/pkg/prober"
    24  
    25  	"github.com/tilt-dev/tilt/internal/controllers/apicmp"
    26  	"github.com/tilt-dev/tilt/internal/controllers/apis/configmap"
    27  	"github.com/tilt-dev/tilt/internal/controllers/apis/trigger"
    28  	"github.com/tilt-dev/tilt/internal/controllers/indexer"
    29  	"github.com/tilt-dev/tilt/internal/engine/local"
    30  	"github.com/tilt-dev/tilt/internal/store"
    31  	"github.com/tilt-dev/tilt/internal/timecmp"
    32  	"github.com/tilt-dev/tilt/pkg/apis"
    33  	"github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1"
    34  	"github.com/tilt-dev/tilt/pkg/logger"
    35  	"github.com/tilt-dev/tilt/pkg/model"
    36  )
    37  
    38  // A controller that reads CmdSpec and writes CmdStatus
    39  type Controller struct {
    40  	globalCtx     context.Context
    41  	indexer       *indexer.Indexer
    42  	execer        Execer
    43  	procs         map[types.NamespacedName]*currentProcess
    44  	proberManager ProberManager
    45  	client        ctrlclient.Client
    46  	st            store.RStore
    47  	clock         clockwork.Clock
    48  	requeuer      *indexer.Requeuer
    49  
    50  	mu sync.Mutex
    51  }
    52  
    53  var _ store.TearDowner = &Controller{}
    54  
    55  func (r *Controller) CreateBuilder(mgr ctrl.Manager) (*builder.Builder, error) {
    56  	b := ctrl.NewControllerManagedBy(mgr).
    57  		For(&Cmd{}).
    58  		Watches(&ConfigMap{},
    59  			handler.EnqueueRequestsFromMapFunc(r.indexer.Enqueue)).
    60  		WatchesRawSource(r.requeuer)
    61  
    62  	trigger.SetupControllerStartOn(b, r.indexer, func(obj ctrlclient.Object) *v1alpha1.StartOnSpec {
    63  		return obj.(*v1alpha1.Cmd).Spec.StartOn
    64  	})
    65  	trigger.SetupControllerRestartOn(b, r.indexer, func(obj ctrlclient.Object) *v1alpha1.RestartOnSpec {
    66  		return obj.(*v1alpha1.Cmd).Spec.RestartOn
    67  	})
    68  
    69  	return b, nil
    70  }
    71  
    72  func NewController(ctx context.Context, execer Execer, proberManager ProberManager, client ctrlclient.Client, st store.RStore, clock clockwork.Clock, scheme *runtime.Scheme) *Controller {
    73  	return &Controller{
    74  		globalCtx:     ctx,
    75  		indexer:       indexer.NewIndexer(scheme, indexCmd),
    76  		clock:         clock,
    77  		execer:        execer,
    78  		procs:         make(map[types.NamespacedName]*currentProcess),
    79  		proberManager: proberManager,
    80  		client:        client,
    81  		st:            st,
    82  		requeuer:      indexer.NewRequeuer(),
    83  	}
    84  }
    85  
    86  // Stop the command, and wait for it to finish before continuing.
    87  func (c *Controller) stop(name types.NamespacedName) {
    88  	proc, ok := c.procs[name]
    89  	if !ok {
    90  		return
    91  	}
    92  
    93  	if proc.cancelFunc == nil {
    94  		return
    95  	}
    96  	proc.cancelFunc()
    97  	<-proc.doneCh
    98  	proc.probeWorker = nil
    99  	proc.cancelFunc = nil
   100  	proc.doneCh = nil
   101  }
   102  
   103  func (c *Controller) TearDown(ctx context.Context) {
   104  	for name := range c.procs {
   105  		c.stop(name)
   106  	}
   107  }
   108  
   109  func inputsFromButton(button *v1alpha1.UIButton) []input {
   110  	if button == nil {
   111  		return nil
   112  	}
   113  	statuses := make(map[string]v1alpha1.UIInputStatus)
   114  	for _, status := range button.Status.Inputs {
   115  		statuses[status.Name] = status
   116  	}
   117  
   118  	var ret []input
   119  	for _, spec := range button.Spec.Inputs {
   120  		ret = append(ret, input{
   121  			spec:   spec,
   122  			status: statuses[spec.Name],
   123  		})
   124  	}
   125  
   126  	return ret
   127  }
   128  
   129  type triggerEvents struct {
   130  	lastRestartEventTime metav1.MicroTime
   131  	lastRestartButton    *v1alpha1.UIButton
   132  	lastStartEventTime   metav1.MicroTime
   133  	lastStartButton      *v1alpha1.UIButton
   134  }
   135  
   136  func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
   137  	c.mu.Lock()
   138  	defer c.mu.Unlock()
   139  
   140  	name := req.NamespacedName
   141  	cmd := &Cmd{}
   142  	err := c.client.Get(ctx, name, cmd)
   143  	c.indexer.OnReconcile(name, cmd)
   144  	if err != nil && !apierrors.IsNotFound(err) {
   145  		return ctrl.Result{}, fmt.Errorf("cmd reconcile: %v", err)
   146  	}
   147  
   148  	if apierrors.IsNotFound(err) || cmd.ObjectMeta.DeletionTimestamp != nil {
   149  		c.stop(name)
   150  		delete(c.procs, name)
   151  		return ctrl.Result{}, nil
   152  	}
   153  
   154  	disableStatus, err := configmap.MaybeNewDisableStatus(ctx, c.client, cmd.Spec.DisableSource, cmd.Status.DisableStatus)
   155  	if err != nil {
   156  		return ctrl.Result{}, err
   157  	}
   158  
   159  	proc := c.ensureProc(name)
   160  	proc.mutateStatus(func(status *v1alpha1.CmdStatus) {
   161  		status.DisableStatus = disableStatus
   162  	})
   163  
   164  	disabled := disableStatus.State == v1alpha1.DisableStateDisabled
   165  	if disabled {
   166  		// Disabling should both stop the process, and make it look like
   167  		// it didn't previously run.
   168  		c.stop(name)
   169  		proc.spec = v1alpha1.CmdSpec{}
   170  		proc.lastStartOnEventTime = metav1.MicroTime{}
   171  		proc.lastRestartOnEventTime = metav1.MicroTime{}
   172  	}
   173  
   174  	if cmd.Annotations[v1alpha1.AnnotationManagedBy] == "local_resource" ||
   175  		cmd.Annotations[v1alpha1.AnnotationManagedBy] == "cmd_image" {
   176  		// Until resource dependencies are expressed in the API,
   177  		// we can't use reconciliation to deploy Cmd objects
   178  		// that are part of local_resource or custom_build.
   179  		err := c.maybeUpdateObjectStatus(ctx, cmd)
   180  		if err != nil {
   181  			return ctrl.Result{}, err
   182  		}
   183  
   184  		return ctrl.Result{}, nil
   185  	}
   186  
   187  	var te triggerEvents
   188  	te.lastRestartEventTime, te.lastRestartButton, _, err = trigger.LastRestartEvent(ctx, c.client, cmd.Spec.RestartOn)
   189  	if err != nil {
   190  		return ctrl.Result{}, err
   191  	}
   192  	te.lastStartEventTime, te.lastStartButton, err = trigger.LastStartEvent(ctx, c.client, cmd.Spec.StartOn)
   193  	if err != nil {
   194  		return ctrl.Result{}, err
   195  	}
   196  	startOn := cmd.Spec.StartOn
   197  	waitsOnStartOn := startOn != nil && len(startOn.UIButtons) > 0
   198  
   199  	lastSpec := proc.spec
   200  	lastRestartOnEventTime := proc.lastRestartOnEventTime
   201  	lastStartOnEventTime := proc.lastStartOnEventTime
   202  
   203  	restartOnTriggered := timecmp.After(te.lastRestartEventTime, lastRestartOnEventTime)
   204  	startOnTriggered := timecmp.After(te.lastStartEventTime, lastStartOnEventTime)
   205  	execSpecChanged := !cmdExecEqual(lastSpec, cmd.Spec)
   206  
   207  	if !disabled {
   208  		// any change to the spec means we should stop the command immediately
   209  		if execSpecChanged {
   210  			c.stop(name)
   211  		}
   212  
   213  		if execSpecChanged && waitsOnStartOn && !startOnTriggered {
   214  			// If the cmd spec has changed since the last run,
   215  			// and StartOn hasn't triggered yet, set the status to waiting.
   216  			proc.mutateStatus(func(status *v1alpha1.CmdStatus) {
   217  				status.Waiting = &CmdStateWaiting{
   218  					Reason: waitingOnStartOnReason,
   219  				}
   220  				status.Running = nil
   221  				status.Terminated = nil
   222  				status.Ready = false
   223  			})
   224  		} else if execSpecChanged || restartOnTriggered || startOnTriggered {
   225  			// Otherwise, any change, new start event, or new restart event
   226  			// should restart the process to pick up changes.
   227  			_ = c.runInternal(ctx, cmd, te)
   228  		}
   229  	}
   230  
   231  	err = c.maybeUpdateObjectStatus(ctx, cmd)
   232  	if err != nil {
   233  		return ctrl.Result{}, err
   234  	}
   235  
   236  	return ctrl.Result{}, nil
   237  }
   238  
   239  func (c *Controller) maybeUpdateObjectStatus(ctx context.Context, cmd *v1alpha1.Cmd) error {
   240  	newStatus := c.ensureProc(types.NamespacedName{Name: cmd.Name}).copyStatus()
   241  	if apicmp.DeepEqual(newStatus, cmd.Status) {
   242  		return nil
   243  	}
   244  
   245  	update := cmd.DeepCopy()
   246  	update.Status = newStatus
   247  	err := c.client.Status().Update(ctx, update)
   248  	if err != nil {
   249  		return err
   250  	}
   251  	c.st.Dispatch(local.NewCmdUpdateStatusAction(update))
   252  	return nil
   253  }
   254  
   255  // Forces the command to run now.
   256  //
   257  // This is a hack to get local_resource commands into the API server,
   258  // even though the API server doesn't have a notion of resource deps yet.
   259  //
   260  // Blocks until the command is finished, then returns its status.
   261  func (c *Controller) ForceRun(ctx context.Context, cmd *v1alpha1.Cmd) (*v1alpha1.CmdStatus, error) {
   262  	c.mu.Lock()
   263  	doneCh := c.runInternal(ctx, cmd, triggerEvents{})
   264  	c.mu.Unlock()
   265  
   266  	select {
   267  	case <-ctx.Done():
   268  		return nil, ctx.Err()
   269  	case <-doneCh:
   270  	}
   271  
   272  	c.mu.Lock()
   273  	defer c.mu.Unlock()
   274  	status := c.ensureProc(types.NamespacedName{Name: cmd.Name}).copyStatus()
   275  	return &status, nil
   276  }
   277  
   278  func (i input) stringValue() string {
   279  	if i.status.Text != nil {
   280  		return i.status.Text.Value
   281  	} else if i.status.Bool != nil {
   282  		if i.status.Bool.Value {
   283  			if i.spec.Bool.TrueString != nil {
   284  				return *i.spec.Bool.TrueString
   285  			} else {
   286  				return "true"
   287  			}
   288  		} else {
   289  			if i.spec.Bool.FalseString != nil {
   290  				return *i.spec.Bool.FalseString
   291  			} else {
   292  				return "false"
   293  			}
   294  		}
   295  	} else if i.status.Hidden != nil {
   296  		return i.status.Hidden.Value
   297  	} else if i.status.Choice != nil {
   298  		for _, v := range i.spec.Choice.Choices {
   299  			if v == i.status.Choice.Value {
   300  				return v
   301  			}
   302  		}
   303  		// if value is invalid, we default to the first choice
   304  		return i.spec.Choice.Choices[0]
   305  	}
   306  	return ""
   307  }
   308  
   309  type input struct {
   310  	spec   v1alpha1.UIInputSpec
   311  	status v1alpha1.UIInputStatus
   312  }
   313  
   314  // Ensures there's a current cmd tracker.
   315  func (c *Controller) ensureProc(name types.NamespacedName) *currentProcess {
   316  	proc, ok := c.procs[name]
   317  	if !ok {
   318  		proc = &currentProcess{}
   319  		c.procs[name] = proc
   320  	}
   321  	return proc
   322  }
   323  
   324  // Runs the command unconditionally, stopping any currently running command.
   325  //
   326  // The filewatches and buttons are needed for bookkeeping on how the command
   327  // was triggered.
   328  //
   329  // Returns a channel that closes when the Cmd is finished.
   330  func (c *Controller) runInternal(ctx context.Context,
   331  	cmd *v1alpha1.Cmd,
   332  	te triggerEvents) chan struct{} {
   333  	name := types.NamespacedName{Name: cmd.Name}
   334  	c.stop(name)
   335  
   336  	proc := c.ensureProc(name)
   337  	proc.spec = cmd.Spec
   338  	proc.isServer = cmd.ObjectMeta.Annotations[local.AnnotationOwnerKind] == "CmdServer"
   339  
   340  	proc.lastRestartOnEventTime = te.lastRestartEventTime
   341  	proc.lastStartOnEventTime = te.lastStartEventTime
   342  
   343  	var inputs []input
   344  	if timecmp.After(proc.lastRestartOnEventTime, proc.lastStartOnEventTime) {
   345  		inputs = inputsFromButton(te.lastRestartButton)
   346  	} else {
   347  		inputs = inputsFromButton(te.lastStartButton)
   348  	}
   349  
   350  	ctx, proc.cancelFunc = context.WithCancel(ctx)
   351  	proc.statusMu.Lock()
   352  	defer proc.statusMu.Unlock()
   353  
   354  	status := &(proc.statusInternal)
   355  	status.Running = nil
   356  	status.Waiting = &CmdStateWaiting{}
   357  	status.Terminated = nil
   358  	status.Ready = false
   359  
   360  	ctx = store.MustObjectLogHandler(ctx, c.st, cmd)
   361  	spec := cmd.Spec
   362  
   363  	if spec.ReadinessProbe != nil {
   364  		probeResultFunc := c.handleProbeResultFunc(ctx, name, proc)
   365  		probeWorker, err := probeWorkerFromSpec(
   366  			c.proberManager,
   367  			spec.ReadinessProbe,
   368  			probeResultFunc)
   369  		if err != nil {
   370  			logger.Get(ctx).Errorf("Invalid readiness probe: %v", err)
   371  			status.Terminated = &CmdStateTerminated{
   372  				ExitCode: 1,
   373  				Reason:   fmt.Sprintf("Invalid readiness probe: %v", err),
   374  			}
   375  			status.Waiting = nil
   376  			status.Running = nil
   377  			status.Ready = false
   378  
   379  			proc.doneCh = make(chan struct{})
   380  			close(proc.doneCh)
   381  			return proc.doneCh
   382  		}
   383  		proc.probeWorker = probeWorker
   384  	}
   385  
   386  	startedAt := apis.NewMicroTime(c.clock.Now())
   387  
   388  	env := append([]string{}, spec.Env...)
   389  	for _, input := range inputs {
   390  		env = append(env, fmt.Sprintf("%s=%s", input.spec.Name, input.stringValue()))
   391  	}
   392  
   393  	cmdModel := model.Cmd{
   394  		Argv: spec.Args,
   395  		Dir:  spec.Dir,
   396  		Env:  env,
   397  	}
   398  	statusCh := c.execer.Start(ctx, cmdModel, logger.Get(ctx).Writer(logger.InfoLvl))
   399  	proc.doneCh = make(chan struct{})
   400  
   401  	go c.processStatuses(ctx, statusCh, proc, name, startedAt)
   402  
   403  	return proc.doneCh
   404  }
   405  
   406  func (c *Controller) handleProbeResultFunc(ctx context.Context, name types.NamespacedName, proc *currentProcess) probe.ResultFunc {
   407  	return func(result prober.Result, statusChanged bool, output string, err error) {
   408  		if ctx.Err() != nil {
   409  			return
   410  		}
   411  
   412  		// we try to balance logging important probe results without flooding the logs
   413  		//  * ALL transitions are logged
   414  		// 		* success->{failure,warning} @ WARN
   415  		// 		* {failure,warning}->success @ INFO
   416  		// 	* subsequent non-successful results @ VERBOSE
   417  		// 		* expected healthy/steady-state is recurring success, and this is apparent
   418  		// 		  from the "Ready" state, so logging every invocation is superfluous
   419  		loggerLevel := logger.NoneLvl
   420  		if statusChanged {
   421  			if result != prober.Success {
   422  				loggerLevel = logger.WarnLvl
   423  			} else {
   424  				loggerLevel = logger.InfoLvl
   425  			}
   426  		} else if result != prober.Success {
   427  			loggerLevel = logger.VerboseLvl
   428  		}
   429  		logProbeOutput(ctx, loggerLevel, result, output, nil)
   430  
   431  		if !statusChanged {
   432  			// the probe did not transition states, so the result is logged but not used to update status
   433  			return
   434  		}
   435  
   436  		ready := result == prober.Success || result == prober.Warning
   437  
   438  		// TODO(milas): this isn't quite right - we might end up setting
   439  		// 	a terminated process to ready, for example; in practice, we
   440  		// 	should update internal state on any goroutine/async trackers
   441  		// 	and trigger a reconciliation, which can then evaluate the full
   442  		// 	state + current spec
   443  		proc.statusMu.Lock()
   444  		defer proc.statusMu.Unlock()
   445  
   446  		status := &(proc.statusInternal)
   447  		if status.Ready != ready {
   448  			status.Ready = ready
   449  			c.requeuer.Add(name)
   450  		}
   451  	}
   452  }
   453  
   454  func logProbeOutput(ctx context.Context, level logger.Level, result prober.Result, output string, err error) {
   455  	l := logger.Get(ctx)
   456  	if level == logger.NoneLvl || !l.Level().ShouldDisplay(level) {
   457  		return
   458  	}
   459  
   460  	w := l.Writer(level)
   461  	if err != nil {
   462  		_, _ = fmt.Fprintf(w, "[readiness probe error] %v\n", err)
   463  	} else if output != "" {
   464  		var logMessage strings.Builder
   465  		s := bufio.NewScanner(strings.NewReader(output))
   466  		for s.Scan() {
   467  			logMessage.WriteString("[readiness probe: ")
   468  			logMessage.WriteString(string(result))
   469  			logMessage.WriteString("] ")
   470  			logMessage.Write(s.Bytes())
   471  			logMessage.WriteRune('\n')
   472  		}
   473  		_, _ = io.WriteString(w, logMessage.String())
   474  	}
   475  }
   476  
   477  const waitingOnStartOnReason = "cmd StartOn has not been triggered"
   478  
   479  func (c *Controller) processStatuses(
   480  	ctx context.Context,
   481  	statusCh chan statusAndMetadata,
   482  	proc *currentProcess,
   483  	name types.NamespacedName,
   484  	startedAt metav1.MicroTime) {
   485  	defer close(proc.doneCh)
   486  
   487  	var initProbeWorker sync.Once
   488  
   489  	for sm := range statusCh {
   490  		if sm.status == Unknown {
   491  			continue
   492  		}
   493  
   494  		if sm.status == Error || sm.status == Done {
   495  			// This is a hack until CmdServer is a real object.
   496  			if proc.isServer && sm.exitCode == 0 {
   497  				logger.Get(ctx).Errorf("Server exited with exit code 0")
   498  			}
   499  
   500  			proc.mutateStatus(func(status *v1alpha1.CmdStatus) {
   501  				status.Waiting = nil
   502  				status.Running = nil
   503  				status.Terminated = &CmdStateTerminated{
   504  					PID:        int32(sm.pid),
   505  					Reason:     sm.reason,
   506  					ExitCode:   int32(sm.exitCode),
   507  					StartedAt:  startedAt,
   508  					FinishedAt: apis.NewMicroTime(c.clock.Now()),
   509  				}
   510  			})
   511  			c.requeuer.Add(name)
   512  		} else if sm.status == Running {
   513  			if proc.probeWorker != nil {
   514  				initProbeWorker.Do(func() {
   515  					go proc.probeWorker.Run(ctx)
   516  				})
   517  			}
   518  
   519  			proc.mutateStatus(func(status *v1alpha1.CmdStatus) {
   520  				status.Waiting = nil
   521  				status.Terminated = nil
   522  				status.Running = &CmdStateRunning{
   523  					PID:       int32(sm.pid),
   524  					StartedAt: startedAt,
   525  				}
   526  
   527  				if proc.probeWorker == nil {
   528  					status.Ready = true
   529  				}
   530  			})
   531  			c.requeuer.Add(name)
   532  		}
   533  	}
   534  }
   535  
   536  // Find all the objects we need to watch based on the Cmd model.
   537  func indexCmd(obj client.Object) []indexer.Key {
   538  	cmd := obj.(*v1alpha1.Cmd)
   539  	result := []indexer.Key{}
   540  	if cmd.Spec.DisableSource != nil {
   541  		cm := cmd.Spec.DisableSource.ConfigMap
   542  		if cm != nil {
   543  			gvk := v1alpha1.SchemeGroupVersion.WithKind("ConfigMap")
   544  			result = append(result, indexer.Key{
   545  				Name: types.NamespacedName{Name: cm.Name},
   546  				GVK:  gvk,
   547  			})
   548  		}
   549  	}
   550  	return result
   551  }
   552  
   553  // currentProcess represents the current process for a Manifest, so that Controller can
   554  // make sure there's at most one process per Manifest.
   555  // (note: it may not be running yet, or may have already finished)
   556  type currentProcess struct {
   557  	spec       CmdSpec
   558  	cancelFunc context.CancelFunc
   559  	// closed when the process finishes executing, intentionally or not
   560  	doneCh      chan struct{}
   561  	probeWorker *probe.Worker
   562  	isServer    bool
   563  
   564  	lastRestartOnEventTime metav1.MicroTime
   565  	lastStartOnEventTime   metav1.MicroTime
   566  
   567  	// We have a lock that ONLY protects the status.
   568  	statusMu       sync.Mutex
   569  	statusInternal v1alpha1.CmdStatus
   570  }
   571  
   572  func (p *currentProcess) copyStatus() v1alpha1.CmdStatus {
   573  	p.statusMu.Lock()
   574  	defer p.statusMu.Unlock()
   575  	return *(p.statusInternal.DeepCopy())
   576  }
   577  
   578  func (p *currentProcess) mutateStatus(update func(status *v1alpha1.CmdStatus)) {
   579  	p.statusMu.Lock()
   580  	defer p.statusMu.Unlock()
   581  	update(&p.statusInternal)
   582  }
   583  
   584  type statusAndMetadata struct {
   585  	pid      int
   586  	status   status
   587  	exitCode int
   588  	reason   string
   589  }
   590  
   591  type status int
   592  
   593  const (
   594  	Unknown status = iota
   595  	Running status = iota
   596  	Done    status = iota
   597  	Error   status = iota
   598  )