github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/caasapplicationprovisioner/application.go (about)

     1  // Copyright 2020 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package caasapplicationprovisioner
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/clock"
    11  	"github.com/juju/errors"
    12  	"github.com/juju/names/v5"
    13  	"github.com/juju/utils/v3"
    14  	"github.com/juju/worker/v3"
    15  	"github.com/juju/worker/v3/catacomb"
    16  
    17  	"github.com/juju/juju/caas"
    18  	"github.com/juju/juju/core/life"
    19  	"github.com/juju/juju/core/status"
    20  	"github.com/juju/juju/core/watcher"
    21  	"github.com/juju/juju/rpc/params"
    22  )
    23  
    24  type appNotifyWorker interface {
    25  	worker.Worker
    26  	Notify()
    27  }
    28  
    29  type appWorker struct {
    30  	catacomb   catacomb.Catacomb
    31  	facade     CAASProvisionerFacade
    32  	broker     CAASBroker
    33  	clock      clock.Clock
    34  	logger     Logger
    35  	unitFacade CAASUnitProvisionerFacade
    36  	ops        ApplicationOps
    37  
    38  	name        string
    39  	modelTag    names.ModelTag
    40  	changes     chan struct{}
    41  	password    string
    42  	lastApplied caas.ApplicationConfig
    43  	life        life.Value
    44  	statusOnly  bool
    45  }
    46  
    47  type AppWorkerConfig struct {
    48  	Name       string
    49  	Facade     CAASProvisionerFacade
    50  	Broker     CAASBroker
    51  	ModelTag   names.ModelTag
    52  	Clock      clock.Clock
    53  	Logger     Logger
    54  	UnitFacade CAASUnitProvisionerFacade
    55  	Ops        ApplicationOps
    56  	StatusOnly bool
    57  }
    58  
    59  const tryAgain errors.ConstError = "try again"
    60  
    61  type NewAppWorkerFunc func(AppWorkerConfig) func() (worker.Worker, error)
    62  
    63  func NewAppWorker(config AppWorkerConfig) func() (worker.Worker, error) {
    64  	ops := config.Ops
    65  	if ops == nil {
    66  		ops = &applicationOps{}
    67  	}
    68  	return func() (worker.Worker, error) {
    69  		changes := make(chan struct{}, 1)
    70  		changes <- struct{}{}
    71  		a := &appWorker{
    72  			name:       config.Name,
    73  			facade:     config.Facade,
    74  			broker:     config.Broker,
    75  			modelTag:   config.ModelTag,
    76  			clock:      config.Clock,
    77  			logger:     config.Logger,
    78  			changes:    changes,
    79  			unitFacade: config.UnitFacade,
    80  			ops:        ops,
    81  			statusOnly: config.StatusOnly,
    82  		}
    83  		err := catacomb.Invoke(catacomb.Plan{
    84  			Site: &a.catacomb,
    85  			Work: a.loop,
    86  		})
    87  		return a, err
    88  	}
    89  }
    90  
    91  func (a *appWorker) Notify() {
    92  	select {
    93  	case a.changes <- struct{}{}:
    94  	case <-a.catacomb.Dying():
    95  	}
    96  }
    97  
    98  func (a *appWorker) Kill() {
    99  	a.catacomb.Kill(nil)
   100  }
   101  
   102  func (a *appWorker) Wait() error {
   103  	return a.catacomb.Wait()
   104  }
   105  
   106  func (a *appWorker) loop() error {
   107  	// TODO(sidecar): support more than statefulset
   108  	app := a.broker.Application(a.name, caas.DeploymentStateful)
   109  
   110  	// If the application no longer exists, return immediately. If it's in
   111  	// Dead state, ensure it's deleted and terminated.
   112  	appLife, err := a.facade.Life(a.name)
   113  	if errors.Is(err, errors.NotFound) {
   114  		a.logger.Debugf("application %q no longer exists", a.name)
   115  		return nil
   116  	} else if err != nil {
   117  		return errors.Annotatef(err, "fetching life status for application %q", a.name)
   118  	}
   119  	a.life = appLife
   120  	if appLife == life.Dead {
   121  		if !a.statusOnly {
   122  			err = a.ops.AppDying(a.name, app, a.life, a.facade, a.unitFacade, a.logger)
   123  			if err != nil {
   124  				return errors.Annotatef(err, "deleting application %q", a.name)
   125  			}
   126  			err = a.ops.AppDead(a.name, app, a.broker, a.facade, a.unitFacade, a.clock, a.logger)
   127  			if err != nil {
   128  				return errors.Annotatef(err, "deleting application %q", a.name)
   129  			}
   130  		}
   131  		return nil
   132  	}
   133  
   134  	if !a.statusOnly {
   135  		// Ensure the charm is upgraded to a v2 charm (or wait for that).
   136  		shouldExit, err := a.ops.VerifyCharmUpgraded(a.name, a.facade, &a.catacomb, a.logger)
   137  		if err != nil {
   138  			return errors.Trace(err)
   139  		}
   140  		if shouldExit {
   141  			return nil
   142  		}
   143  
   144  		err = a.ops.UpgradePodSpec(a.name, a.broker, a.clock, &a.catacomb, a.logger)
   145  		if err != nil {
   146  			return errors.Trace(err)
   147  		}
   148  
   149  		// Update the password once per worker start to avoid it changing too frequently.
   150  		a.password, err = utils.RandomPassword()
   151  		if err != nil {
   152  			return errors.Trace(err)
   153  		}
   154  		err = a.facade.SetPassword(a.name, a.password)
   155  		if err != nil {
   156  			return errors.Annotate(err, "failed to set application api passwords")
   157  		}
   158  	}
   159  
   160  	var appChanges watcher.NotifyChannel
   161  	var appProvisionChanges watcher.NotifyChannel
   162  	var replicaChanges watcher.NotifyChannel
   163  	var lastReportedStatus map[string]status.StatusInfo
   164  
   165  	appScaleWatcher, err := a.unitFacade.WatchApplicationScale(a.name)
   166  	if err != nil {
   167  		return errors.Annotatef(err, "creating application %q scale watcher", a.name)
   168  	}
   169  	if err := a.catacomb.Add(appScaleWatcher); err != nil {
   170  		return errors.Annotatef(err, "failed to watch for application %q scale changes", a.name)
   171  	}
   172  
   173  	appTrustWatcher, err := a.unitFacade.WatchApplicationTrustHash(a.name)
   174  	if err != nil {
   175  		return errors.Annotatef(err, "creating application %q trust watcher", a.name)
   176  	}
   177  	if err := a.catacomb.Add(appTrustWatcher); err != nil {
   178  		return errors.Annotatef(err, "failed to watch for application %q trust changes", a.name)
   179  	}
   180  
   181  	var appUnitsWatcher watcher.StringsWatcher
   182  	appUnitsWatcher, err = a.facade.WatchUnits(a.name)
   183  	if err != nil {
   184  		return errors.Annotatef(err, "creating application %q units watcher", a.name)
   185  	}
   186  	if err := a.catacomb.Add(appUnitsWatcher); err != nil {
   187  		return errors.Annotatef(err, "failed to watch for application %q units changes", a.name)
   188  	}
   189  
   190  	done := false
   191  
   192  	var (
   193  		initial             = true
   194  		scaleChan           <-chan time.Time
   195  		scaleTries          int
   196  		trustChan           <-chan time.Time
   197  		trustTries          int
   198  		reconcileDeadChan   <-chan time.Time
   199  		stateAppChangedChan <-chan time.Time
   200  	)
   201  	const (
   202  		maxRetries = 20
   203  		retryDelay = 3 * time.Second
   204  	)
   205  
   206  	handleChange := func() error {
   207  		appLife, err = a.facade.Life(a.name)
   208  		if errors.Is(err, errors.NotFound) {
   209  			appLife = life.Dead
   210  		} else if err != nil {
   211  			return errors.Trace(err)
   212  		}
   213  		a.life = appLife
   214  
   215  		if initial {
   216  			initial = false
   217  			ps, err := a.facade.ProvisioningState(a.name)
   218  			if err != nil {
   219  				return errors.Trace(err)
   220  			}
   221  			if ps != nil && ps.Scaling {
   222  				if a.statusOnly {
   223  					// Clear provisioning state for status only app.
   224  					err = a.facade.SetProvisioningState(a.name, params.CAASApplicationProvisioningState{})
   225  					if err != nil {
   226  						return errors.Trace(err)
   227  					}
   228  				} else {
   229  					scaleChan = a.clock.After(0)
   230  					reconcileDeadChan = a.clock.After(0)
   231  				}
   232  			}
   233  		}
   234  		switch appLife {
   235  		case life.Alive:
   236  			if appProvisionChanges == nil {
   237  				appProvisionWatcher, err := a.facade.WatchProvisioningInfo(a.name)
   238  				if err != nil {
   239  					return errors.Annotatef(err, "failed to watch facade for changes to application provisioning %q", a.name)
   240  				}
   241  				if err := a.catacomb.Add(appProvisionWatcher); err != nil {
   242  					return errors.Trace(err)
   243  				}
   244  				appProvisionChanges = appProvisionWatcher.Changes()
   245  			}
   246  			if !a.statusOnly {
   247  				err = a.ops.AppAlive(a.name, app, a.password, &a.lastApplied, a.facade, a.clock, a.logger)
   248  				if errors.Is(err, errors.NotProvisioned) {
   249  					// State not ready for this application to be provisioned yet.
   250  					// Usually because the charm has not yet been downloaded.
   251  					break
   252  				} else if err != nil {
   253  					return errors.Trace(err)
   254  				}
   255  			}
   256  			if appChanges == nil {
   257  				appWatcher, err := app.Watch()
   258  				if err != nil {
   259  					return errors.Annotatef(err, "failed to watch for changes to application %q", a.name)
   260  				}
   261  				if err := a.catacomb.Add(appWatcher); err != nil {
   262  					return errors.Trace(err)
   263  				}
   264  				appChanges = appWatcher.Changes()
   265  			}
   266  			if replicaChanges == nil {
   267  				replicaWatcher, err := app.WatchReplicas()
   268  				if err != nil {
   269  					return errors.Annotatef(err, "failed to watch for changes to replicas %q", a.name)
   270  				}
   271  				if err := a.catacomb.Add(replicaWatcher); err != nil {
   272  					return errors.Trace(err)
   273  				}
   274  				replicaChanges = replicaWatcher.Changes()
   275  			}
   276  		case life.Dying:
   277  			if !a.statusOnly {
   278  				err = a.ops.AppDying(a.name, app, a.life, a.facade, a.unitFacade, a.logger)
   279  				if err != nil {
   280  					return errors.Trace(err)
   281  				}
   282  			}
   283  		case life.Dead:
   284  			if !a.statusOnly {
   285  				err = a.ops.AppDying(a.name, app, a.life, a.facade, a.unitFacade, a.logger)
   286  				if err != nil {
   287  					return errors.Trace(err)
   288  				}
   289  				err = a.ops.AppDead(a.name, app, a.broker, a.facade, a.unitFacade, a.clock, a.logger)
   290  				if err != nil {
   291  					return errors.Trace(err)
   292  				}
   293  			}
   294  			done = true
   295  			return nil
   296  		default:
   297  			return errors.NotImplementedf("unknown life %q", a.life)
   298  		}
   299  		return nil
   300  	}
   301  
   302  	for {
   303  		shouldRefresh := true
   304  		select {
   305  		case _, ok := <-appScaleWatcher.Changes():
   306  			if !ok {
   307  				return fmt.Errorf("application %q scale watcher closed channel", a.name)
   308  			}
   309  			if scaleChan == nil {
   310  				scaleTries = 0
   311  				scaleChan = a.clock.After(0)
   312  			}
   313  			shouldRefresh = false
   314  		case <-scaleChan:
   315  			if a.statusOnly {
   316  				scaleChan = nil
   317  				break
   318  			}
   319  			err := a.ops.EnsureScale(a.name, app, a.life, a.facade, a.unitFacade, a.logger)
   320  			if errors.Is(err, errors.NotFound) {
   321  				if scaleTries >= maxRetries {
   322  					return errors.Annotatef(err, "more than %d retries ensuring scale", maxRetries)
   323  				}
   324  				scaleTries++
   325  				scaleChan = a.clock.After(retryDelay)
   326  				shouldRefresh = false
   327  			} else if errors.Is(err, tryAgain) {
   328  				scaleChan = a.clock.After(retryDelay)
   329  				shouldRefresh = false
   330  			} else if err != nil {
   331  				return errors.Trace(err)
   332  			} else {
   333  				scaleChan = nil
   334  			}
   335  		case _, ok := <-appTrustWatcher.Changes():
   336  			if !ok {
   337  				return fmt.Errorf("application %q trust watcher closed channel", a.name)
   338  			}
   339  			if trustChan == nil {
   340  				trustTries = 0
   341  				trustChan = a.clock.After(0)
   342  			}
   343  			shouldRefresh = false
   344  		case <-trustChan:
   345  			if a.statusOnly {
   346  				trustChan = nil
   347  				break
   348  			}
   349  			err := a.ops.EnsureTrust(a.name, app, a.unitFacade, a.logger)
   350  			if errors.Is(err, errors.NotFound) {
   351  				if trustTries >= maxRetries {
   352  					return errors.Annotatef(err, "more than %d retries ensuring trust", maxRetries)
   353  				}
   354  				trustTries++
   355  				trustChan = a.clock.After(retryDelay)
   356  				shouldRefresh = false
   357  			} else if err != nil {
   358  				return errors.Trace(err)
   359  			} else {
   360  				trustChan = nil
   361  			}
   362  		case _, ok := <-appUnitsWatcher.Changes():
   363  			if !ok {
   364  				return fmt.Errorf("application %q units watcher closed channel", a.name)
   365  			}
   366  			if reconcileDeadChan == nil {
   367  				reconcileDeadChan = a.clock.After(0)
   368  			}
   369  			shouldRefresh = false
   370  		case <-reconcileDeadChan:
   371  			if a.statusOnly {
   372  				reconcileDeadChan = nil
   373  				break
   374  			}
   375  			err := a.ops.ReconcileDeadUnitScale(a.name, app, a.facade, a.logger)
   376  			if errors.Is(err, errors.NotFound) {
   377  				reconcileDeadChan = a.clock.After(retryDelay)
   378  				shouldRefresh = false
   379  			} else if errors.Is(err, tryAgain) {
   380  				reconcileDeadChan = a.clock.After(retryDelay)
   381  				shouldRefresh = false
   382  			} else if err != nil {
   383  				return fmt.Errorf("reconciling dead unit scale: %w", err)
   384  			} else {
   385  				reconcileDeadChan = nil
   386  			}
   387  		case <-a.catacomb.Dying():
   388  			return a.catacomb.ErrDying()
   389  		case <-appProvisionChanges:
   390  			if stateAppChangedChan == nil {
   391  				stateAppChangedChan = a.clock.After(0)
   392  			}
   393  			shouldRefresh = false
   394  		case <-a.changes:
   395  			if stateAppChangedChan == nil {
   396  				stateAppChangedChan = a.clock.After(0)
   397  			}
   398  			shouldRefresh = false
   399  		case <-stateAppChangedChan:
   400  			// Respond to life changes (Notify called by parent worker).
   401  			err = handleChange()
   402  			if errors.Is(err, tryAgain) {
   403  				stateAppChangedChan = a.clock.After(retryDelay)
   404  				shouldRefresh = false
   405  			} else if err != nil {
   406  				return errors.Trace(err)
   407  			} else {
   408  				stateAppChangedChan = nil
   409  			}
   410  		case <-appChanges:
   411  			// Respond to changes in provider application.
   412  			lastReportedStatus, err = a.ops.UpdateState(a.name, app, lastReportedStatus, a.broker, a.facade, a.unitFacade, a.logger)
   413  			if err != nil {
   414  				return errors.Trace(err)
   415  			}
   416  		case <-replicaChanges:
   417  			// Respond to changes in replicas of the application.
   418  			lastReportedStatus, err = a.ops.UpdateState(a.name, app, lastReportedStatus, a.broker, a.facade, a.unitFacade, a.logger)
   419  			if err != nil {
   420  				return errors.Trace(err)
   421  			}
   422  		case <-a.clock.After(10 * time.Second):
   423  			// Force refresh of application status.
   424  		}
   425  		if done {
   426  			return nil
   427  		}
   428  		if shouldRefresh {
   429  			if err = a.ops.RefreshApplicationStatus(a.name, app, appLife, a.facade, a.logger); err != nil {
   430  				return errors.Annotatef(err, "refreshing application status for %q", a.name)
   431  			}
   432  		}
   433  	}
   434  }