github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/caasapplicationprovisioner/ops.go (about)

     1  // Copyright 2023 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package caasapplicationprovisioner
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"reflect"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/juju/charm/v12"
    14  	"github.com/juju/clock"
    15  	"github.com/juju/errors"
    16  	"github.com/juju/names/v5"
    17  	"github.com/juju/retry"
    18  
    19  	"github.com/juju/juju/caas"
    20  	"github.com/juju/juju/cloudconfig/podcfg"
    21  	"github.com/juju/juju/core/life"
    22  	"github.com/juju/juju/core/status"
    23  	"github.com/juju/juju/rpc/params"
    24  )
    25  
    26  // ApplicationOps defines all the operations the application worker can perform.
    27  // This is exported for testing only.
    28  type ApplicationOps interface {
    29  	AppAlive(appName string, app caas.Application, password string, lastApplied *caas.ApplicationConfig,
    30  		facade CAASProvisionerFacade, clk clock.Clock, logger Logger) error
    31  
    32  	AppDying(appName string, app caas.Application, appLife life.Value,
    33  		facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error
    34  
    35  	AppDead(appName string, app caas.Application,
    36  		broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, clk clock.Clock, logger Logger) error
    37  
    38  	VerifyCharmUpgraded(appName string,
    39  		facade CAASProvisionerFacade, tomb Tomb, logger Logger) (shouldExit bool, err error)
    40  
    41  	UpgradePodSpec(appName string,
    42  		broker CAASBroker, clk clock.Clock, tomb Tomb, logger Logger) error
    43  
    44  	EnsureTrust(appName string, app caas.Application,
    45  		unitFacade CAASUnitProvisionerFacade, logger Logger) error
    46  
    47  	UpdateState(appName string, app caas.Application, lastReportedStatus map[string]status.StatusInfo,
    48  		broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) (map[string]status.StatusInfo, error)
    49  
    50  	RefreshApplicationStatus(appName string, app caas.Application, appLife life.Value,
    51  		facade CAASProvisionerFacade, logger Logger) error
    52  
    53  	WaitForTerminated(appName string, app caas.Application,
    54  		clk clock.Clock) error
    55  
    56  	ReconcileDeadUnitScale(appName string, app caas.Application,
    57  		facade CAASProvisionerFacade, logger Logger) error
    58  
    59  	EnsureScale(appName string, app caas.Application, appLife life.Value,
    60  		facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error
    61  }
    62  
    63  type applicationOps struct {
    64  }
    65  
    66  func (applicationOps) AppAlive(appName string, app caas.Application, password string, lastApplied *caas.ApplicationConfig,
    67  	facade CAASProvisionerFacade, clk clock.Clock, logger Logger) error {
    68  	return appAlive(appName, app, password, lastApplied, facade, clk, logger)
    69  }
    70  
    71  func (applicationOps) AppDying(appName string, app caas.Application, appLife life.Value,
    72  	facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error {
    73  	return appDying(appName, app, appLife, facade, unitFacade, logger)
    74  }
    75  
    76  func (applicationOps) AppDead(appName string, app caas.Application,
    77  	broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, clk clock.Clock, logger Logger) error {
    78  	return appDead(appName, app, broker, facade, unitFacade, clk, logger)
    79  }
    80  
    81  func (applicationOps) VerifyCharmUpgraded(appName string,
    82  	facade CAASProvisionerFacade, tomb Tomb, logger Logger) (shouldExit bool, err error) {
    83  	return verifyCharmUpgraded(appName, facade, tomb, logger)
    84  }
    85  
    86  func (applicationOps) UpgradePodSpec(appName string,
    87  	broker CAASBroker, clk clock.Clock, tomb Tomb, logger Logger) error {
    88  	return upgradePodSpec(appName, broker, clk, tomb, logger)
    89  }
    90  
    91  func (applicationOps) EnsureTrust(appName string, app caas.Application,
    92  	unitFacade CAASUnitProvisionerFacade, logger Logger) error {
    93  	return ensureTrust(appName, app, unitFacade, logger)
    94  }
    95  
    96  func (applicationOps) UpdateState(appName string, app caas.Application, lastReportedStatus map[string]status.StatusInfo,
    97  	broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) (map[string]status.StatusInfo, error) {
    98  	return updateState(appName, app, lastReportedStatus, broker, facade, unitFacade, logger)
    99  }
   100  
   101  func (applicationOps) RefreshApplicationStatus(appName string, app caas.Application, appLife life.Value,
   102  	facade CAASProvisionerFacade, logger Logger) error {
   103  	return refreshApplicationStatus(appName, app, appLife, facade, logger)
   104  }
   105  
   106  func (applicationOps) WaitForTerminated(appName string, app caas.Application,
   107  	clk clock.Clock) error {
   108  	return waitForTerminated(appName, app, clk)
   109  }
   110  
   111  func (applicationOps) ReconcileDeadUnitScale(appName string, app caas.Application,
   112  	facade CAASProvisionerFacade, logger Logger) error {
   113  	return reconcileDeadUnitScale(appName, app, facade, logger)
   114  }
   115  
   116  func (applicationOps) EnsureScale(appName string, app caas.Application, appLife life.Value,
   117  	facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error {
   118  	return ensureScale(appName, app, appLife, facade, unitFacade, logger)
   119  }
   120  
   121  type Tomb interface {
   122  	Dying() <-chan struct{}
   123  	ErrDying() error
   124  }
   125  
   126  // appAlive handles the life.Alive state for the CAAS application. It handles invoking the
   127  // CAAS broker to create the resources in the k8s cluster for this application.
   128  func appAlive(appName string, app caas.Application, password string, lastApplied *caas.ApplicationConfig,
   129  	facade CAASProvisionerFacade, clk clock.Clock, logger Logger) error {
   130  	logger.Debugf("ensuring application %q exists", appName)
   131  
   132  	provisionInfo, err := facade.ProvisioningInfo(appName)
   133  	if err != nil {
   134  		return errors.Annotate(err, "retrieving provisioning info")
   135  	}
   136  	if provisionInfo.CharmURL == nil {
   137  		return errors.Errorf("missing charm url in provision info")
   138  	}
   139  
   140  	charmInfo, err := facade.CharmInfo(provisionInfo.CharmURL.String())
   141  	if err != nil {
   142  		return errors.Annotatef(err, "retrieving charm deployment info for %q", appName)
   143  	}
   144  
   145  	appState, err := app.Exists()
   146  	if err != nil {
   147  		return errors.Annotatef(err, "retrieving application state for %q", appName)
   148  	}
   149  
   150  	if appState.Exists && appState.Terminating {
   151  		if err := waitForTerminated(appName, app, clk); err != nil {
   152  			return errors.Annotatef(err, "%q was terminating and there was an error waiting for it to stop", appName)
   153  		}
   154  	}
   155  
   156  	images, err := facade.ApplicationOCIResources(appName)
   157  	if err != nil {
   158  		return errors.Annotate(err, "getting OCI image resources")
   159  	}
   160  
   161  	ch := charmInfo.Charm()
   162  	charmBaseImage, err := podcfg.ImageForBase(provisionInfo.ImageDetails.Repository, charm.Base{
   163  		Name: provisionInfo.Base.OS,
   164  		Channel: charm.Channel{
   165  			Track: provisionInfo.Base.Channel.Track,
   166  			Risk:  charm.Risk(provisionInfo.Base.Channel.Risk),
   167  		},
   168  	})
   169  	if err != nil {
   170  		return errors.Annotate(err, "getting image for base")
   171  	}
   172  
   173  	containers := make(map[string]caas.ContainerConfig)
   174  	for k, v := range ch.Meta().Containers {
   175  		container := caas.ContainerConfig{
   176  			Name: k,
   177  			Uid:  v.Uid,
   178  			Gid:  v.Gid,
   179  		}
   180  		if v.Resource == "" {
   181  			return errors.NotValidf("empty container resource reference")
   182  		}
   183  		image, ok := images[v.Resource]
   184  		if !ok {
   185  			return errors.NotFoundf("referenced charm base image resource %s", v.Resource)
   186  		}
   187  		container.Image = image
   188  		for _, m := range v.Mounts {
   189  			container.Mounts = append(container.Mounts, caas.MountConfig{
   190  				StorageName: m.Storage,
   191  				Path:        m.Location,
   192  			})
   193  		}
   194  		containers[k] = container
   195  	}
   196  
   197  	// TODO(sidecar): container.Mounts[*].Path <= consolidate? => provisionInfo.Filesystems[*].Attachment.Path
   198  	config := caas.ApplicationConfig{
   199  		IsPrivateImageRepo:   provisionInfo.ImageDetails.IsPrivate(),
   200  		IntroductionSecret:   password,
   201  		AgentVersion:         provisionInfo.Version,
   202  		AgentImagePath:       provisionInfo.ImageDetails.RegistryPath,
   203  		ControllerAddresses:  strings.Join(provisionInfo.APIAddresses, ","),
   204  		ControllerCertBundle: provisionInfo.CACert,
   205  		ResourceTags:         provisionInfo.Tags,
   206  		Constraints:          provisionInfo.Constraints,
   207  		Filesystems:          provisionInfo.Filesystems,
   208  		Devices:              provisionInfo.Devices,
   209  		CharmBaseImagePath:   charmBaseImage,
   210  		Containers:           containers,
   211  		CharmModifiedVersion: provisionInfo.CharmModifiedVersion,
   212  		Trust:                provisionInfo.Trust,
   213  		InitialScale:         provisionInfo.Scale,
   214  	}
   215  	switch ch.Meta().CharmUser {
   216  	case charm.RunAsDefault, charm.RunAsRoot:
   217  		config.CharmUser = caas.RunAsRoot
   218  	case charm.RunAsSudoer:
   219  		// TODO(pebble): once pebble supports auth, allow running as non-root.
   220  		//config.CharmUser = caas.RunAsSudoer
   221  		config.CharmUser = caas.RunAsRoot
   222  	case charm.RunAsNonRoot:
   223  		// TODO(pebble): once pebble supports auth, allow running as non-root.
   224  		//config.CharmUser = caas.RunAsNonRoot
   225  		config.CharmUser = caas.RunAsRoot
   226  	default:
   227  		return errors.NotValidf("unknown RunAs for CharmUser: %q", ch.Meta().CharmUser)
   228  	}
   229  	reason := "unchanged"
   230  	// TODO(sidecar): implement Equals method for caas.ApplicationConfig
   231  	if !reflect.DeepEqual(config, *lastApplied) {
   232  		if err = app.Ensure(config); err != nil {
   233  			_ = setApplicationStatus(appName, status.Error, err.Error(), nil, facade, logger)
   234  			return errors.Annotatef(err, "ensuring application %q", appName)
   235  		}
   236  		*lastApplied = config
   237  		reason = "deployed"
   238  		if appState.Exists {
   239  			reason = "updated"
   240  		}
   241  	}
   242  	logger.Debugf("application %q was %q", appName, reason)
   243  	return nil
   244  }
   245  
   246  // appDying handles the life.Dying state for the CAAS application. It deals with scaling down
   247  // the application and removing units.
   248  func appDying(appName string, app caas.Application, appLife life.Value,
   249  	facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error {
   250  	logger.Debugf("application %q dying", appName)
   251  	err := ensureScale(appName, app, appLife, facade, unitFacade, logger)
   252  	if err != nil {
   253  		return errors.Annotate(err, "cannot scale dying application to 0")
   254  	}
   255  	err = reconcileDeadUnitScale(appName, app, facade, logger)
   256  	if err != nil {
   257  		return errors.Annotate(err, "cannot reconcile dead units in dying application")
   258  	}
   259  	return nil
   260  }
   261  
   262  // appDead handles the life.Dead state for the CAAS application. It ensures the application
   263  // is removed from the k8s cluster and unblocks the cleanup of the application in state.
   264  func appDead(appName string, app caas.Application,
   265  	broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, clk clock.Clock, logger Logger) error {
   266  	logger.Debugf("application %q dead", appName)
   267  	err := app.Delete()
   268  	if err != nil {
   269  		return errors.Trace(err)
   270  	}
   271  	err = waitForTerminated(appName, app, clk)
   272  	if err != nil {
   273  		return errors.Trace(err)
   274  	}
   275  	_, err = updateState(appName, app, nil, broker, facade, unitFacade, logger)
   276  	if err != nil {
   277  		return errors.Trace(err)
   278  	}
   279  	// Clear "has-resources" flag so state knows it can now remove the application.
   280  	err = facade.ClearApplicationResources(appName)
   281  	if err != nil {
   282  		return errors.Trace(err)
   283  	}
   284  	return nil
   285  }
   286  
   287  // verifyCharmUpgraded waits till the charm is upgraded to a v2 charm.
   288  func verifyCharmUpgraded(appName string,
   289  	facade CAASProvisionerFacade, tomb Tomb, logger Logger) (shouldExit bool, err error) {
   290  	appStateWatcher, err := facade.WatchApplication(appName)
   291  	if err != nil {
   292  		return false, errors.Annotatef(err, "failed to watch for changes to application %q when verifying charm upgrade", appName)
   293  	}
   294  	defer appStateWatcher.Kill()
   295  
   296  	appStateChanges := appStateWatcher.Changes()
   297  	for {
   298  		charmInfo, err := facade.ApplicationCharmInfo(appName)
   299  		if errors.Is(err, errors.NotFound) {
   300  			logger.Debugf("application %q no longer exists", appName)
   301  			return true, nil
   302  		} else if err != nil {
   303  			return false, errors.Annotatef(err, "failed to get charm info for application %q", appName)
   304  		}
   305  		format := charm.MetaFormat(charmInfo.Charm())
   306  		if format >= charm.FormatV2 {
   307  			logger.Debugf("application %q is now a v2 charm", appName)
   308  			return false, nil
   309  		}
   310  
   311  		appLife, err := facade.Life(appName)
   312  		if errors.Is(err, errors.NotFound) {
   313  			logger.Debugf("application %q no longer exists", appName)
   314  			return true, nil
   315  		} else if err != nil {
   316  			return false, errors.Trace(err)
   317  		}
   318  		if appLife == life.Dead {
   319  			logger.Debugf("application %q now dead", appName)
   320  			return true, nil
   321  		}
   322  
   323  		// Wait for next app change, then loop to check charm format again.
   324  		select {
   325  		case <-appStateChanges:
   326  		case <-tomb.Dying():
   327  			return false, tomb.ErrDying()
   328  		}
   329  	}
   330  }
   331  
   332  // upgradePodSpec checks to see if the application used to be a podspec statefulset charm
   333  // and then to trigger an upgrade and wait for it to complete.
   334  func upgradePodSpec(appName string,
   335  	broker CAASBroker, clk clock.Clock, tomb Tomb, logger Logger) error {
   336  	// If the application has an operator pod due to upgrading the charm from a pod-spec charm
   337  	// to a sidecar charm, delete it. Also delete workload pod.
   338  	const maxDeleteLoops = 20
   339  	for i := 0; ; i++ {
   340  		if i >= maxDeleteLoops {
   341  			return fmt.Errorf("couldn't delete operator and service with %d tries", maxDeleteLoops)
   342  		}
   343  		if i > 0 {
   344  			select {
   345  			case <-clk.After(3 * time.Second):
   346  			case <-tomb.Dying():
   347  				return tomb.ErrDying()
   348  			}
   349  		}
   350  
   351  		exists, err := broker.OperatorExists(appName)
   352  		if err != nil {
   353  			return errors.Annotatef(err, "checking if %q has an operator pod due to upgrading the charm from a pod-spec charm to a sidecar charm", appName)
   354  		}
   355  		if !exists.Exists {
   356  			break
   357  		}
   358  
   359  		logger.Infof("app %q has just been upgraded from a podspec charm to sidecar, now deleting workload and operator pods", appName)
   360  		err = broker.DeleteService(appName)
   361  		if err != nil && !errors.Is(err, errors.NotFound) {
   362  			return errors.Annotatef(err, "deleting workload pod for application %q", appName)
   363  		}
   364  
   365  		// Wait till the units are gone, to ensure worker code isn't messing
   366  		// with old units, only new sidecar pods.
   367  		const maxUnitsLoops = 20
   368  		for j := 0; ; j++ {
   369  			if j >= maxUnitsLoops {
   370  				return fmt.Errorf("pods still present after %d tries", maxUnitsLoops)
   371  			}
   372  			units, err := broker.Units(appName, caas.ModeWorkload)
   373  			if err != nil && !errors.Is(err, errors.NotFound) {
   374  				return errors.Annotatef(err, "fetching workload units for application %q", appName)
   375  			}
   376  			if len(units) == 0 {
   377  				break
   378  			}
   379  			logger.Debugf("%q: waiting for workload pods to be deleted", appName)
   380  			select {
   381  			case <-clk.After(3 * time.Second):
   382  			case <-tomb.Dying():
   383  				return tomb.ErrDying()
   384  			}
   385  		}
   386  
   387  		err = broker.DeleteOperator(appName)
   388  		if err != nil && !errors.Is(err, errors.NotFound) {
   389  			return errors.Annotatef(err, "deleting operator pod for application %q", appName)
   390  		}
   391  	}
   392  	return nil
   393  }
   394  
   395  // ensureTrust updates the applications Trust status on the CAAS broker, giving it
   396  // access to the k8s api via a service account.
   397  func ensureTrust(appName string, app caas.Application,
   398  	unitFacade CAASUnitProvisionerFacade, logger Logger) error {
   399  	desiredTrust, err := unitFacade.ApplicationTrust(appName)
   400  	if err != nil {
   401  		return errors.Annotatef(err, "fetching application %q desired trust", appName)
   402  	}
   403  
   404  	logger.Debugf("updating application %q trust to %v", appName, desiredTrust)
   405  	err = app.Trust(desiredTrust)
   406  	if err != nil {
   407  		return errors.Annotatef(
   408  			err,
   409  			"updating application %q to desired trust %v",
   410  			appName,
   411  			desiredTrust)
   412  	}
   413  	return nil
   414  }
   415  
   416  // updateState reports back information about the CAAS application into state, such as
   417  // status, IP addresses and volume info.
   418  func updateState(appName string, app caas.Application, lastReportedStatus map[string]status.StatusInfo,
   419  	broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) (map[string]status.StatusInfo, error) {
   420  	appTag := names.NewApplicationTag(appName).String()
   421  	appStatus := params.EntityStatus{}
   422  	svc, err := app.Service()
   423  	if err != nil && !errors.Is(err, errors.NotFound) {
   424  		return nil, errors.Trace(err)
   425  	}
   426  	if svc != nil {
   427  		appStatus = params.EntityStatus{
   428  			Status: svc.Status.Status,
   429  			Info:   svc.Status.Message,
   430  			Data:   svc.Status.Data,
   431  		}
   432  		err = unitFacade.UpdateApplicationService(params.UpdateApplicationServiceArg{
   433  			ApplicationTag: appTag,
   434  			ProviderId:     svc.Id,
   435  			Addresses:      params.FromProviderAddresses(svc.Addresses...),
   436  		})
   437  		if errors.Is(err, errors.NotFound) {
   438  			// Do nothing
   439  		} else if err != nil {
   440  			return nil, errors.Trace(err)
   441  		}
   442  	}
   443  
   444  	units, err := app.Units()
   445  	if err != nil {
   446  		return nil, errors.Trace(err)
   447  	}
   448  
   449  	reportedStatus := make(map[string]status.StatusInfo)
   450  	args := params.UpdateApplicationUnits{
   451  		ApplicationTag: appTag,
   452  		Status:         appStatus,
   453  	}
   454  	for _, u := range units {
   455  		// For pods managed by the substrate, any marked as dying
   456  		// are treated as non-existing.
   457  		if u.Dying {
   458  			continue
   459  		}
   460  		unitStatus := u.Status
   461  		lastStatus, ok := lastReportedStatus[u.Id]
   462  		reportedStatus[u.Id] = unitStatus
   463  		// TODO: Determine a better way to propagate status
   464  		// without constantly overriding the juju state value.
   465  		if ok {
   466  			// If we've seen the same status value previously,
   467  			// report as unknown as this value is ignored.
   468  			if reflect.DeepEqual(lastStatus, unitStatus) {
   469  				unitStatus = status.StatusInfo{
   470  					Status: status.Unknown,
   471  				}
   472  			}
   473  		}
   474  		unitParams := params.ApplicationUnitParams{
   475  			ProviderId: u.Id,
   476  			Address:    u.Address,
   477  			Ports:      u.Ports,
   478  			Stateful:   u.Stateful,
   479  			Status:     unitStatus.Status.String(),
   480  			Info:       unitStatus.Message,
   481  			Data:       unitStatus.Data,
   482  		}
   483  		// Fill in any filesystem info for volumes attached to the unit.
   484  		// A unit will not become active until all required volumes are
   485  		// provisioned, so it makes sense to send this information along
   486  		// with the units to which they are attached.
   487  		for _, info := range u.FilesystemInfo {
   488  			unitParams.FilesystemInfo = append(unitParams.FilesystemInfo, params.KubernetesFilesystemInfo{
   489  				StorageName:  info.StorageName,
   490  				FilesystemId: info.FilesystemId,
   491  				Size:         info.Size,
   492  				MountPoint:   info.MountPoint,
   493  				ReadOnly:     info.ReadOnly,
   494  				Status:       info.Status.Status.String(),
   495  				Info:         info.Status.Message,
   496  				Data:         info.Status.Data,
   497  				Volume: params.KubernetesVolumeInfo{
   498  					VolumeId:   info.Volume.VolumeId,
   499  					Size:       info.Volume.Size,
   500  					Persistent: info.Volume.Persistent,
   501  					Status:     info.Volume.Status.Status.String(),
   502  					Info:       info.Volume.Status.Message,
   503  					Data:       info.Volume.Status.Data,
   504  				},
   505  			})
   506  		}
   507  		args.Units = append(args.Units, unitParams)
   508  	}
   509  
   510  	appUnitInfo, err := facade.UpdateUnits(args)
   511  	if err != nil {
   512  		// We can ignore not found errors as the worker will get stopped anyway.
   513  		// We can also ignore Forbidden errors raised from SetScale because disordered events could happen often.
   514  		if !errors.Is(err, errors.Forbidden) && !errors.Is(err, errors.NotFound) {
   515  			return nil, errors.Trace(err)
   516  		}
   517  		logger.Warningf("update units %v", err)
   518  	}
   519  
   520  	if appUnitInfo != nil {
   521  		for _, unitInfo := range appUnitInfo.Units {
   522  			unit, err := names.ParseUnitTag(unitInfo.UnitTag)
   523  			if err != nil {
   524  				return nil, errors.Trace(err)
   525  			}
   526  			err = broker.AnnotateUnit(appName, caas.ModeSidecar, unitInfo.ProviderId, unit)
   527  			if errors.Is(err, errors.NotFound) {
   528  				continue
   529  			} else if err != nil {
   530  				return nil, errors.Trace(err)
   531  			}
   532  		}
   533  	}
   534  	return reportedStatus, nil
   535  }
   536  
   537  func refreshApplicationStatus(appName string, app caas.Application, appLife life.Value,
   538  	facade CAASProvisionerFacade, logger Logger) error {
   539  	if appLife != life.Alive {
   540  		return nil
   541  	}
   542  	st, err := app.State()
   543  	if errors.Is(err, errors.NotFound) {
   544  		// Do nothing.
   545  		return nil
   546  	} else if err != nil {
   547  		return errors.Trace(err)
   548  	}
   549  
   550  	// refresh the units information.
   551  	units, err := facade.Units(appName)
   552  	if errors.Is(err, errors.NotFound) {
   553  		return nil
   554  	} else if err != nil {
   555  		return errors.Trace(err)
   556  	}
   557  	readyUnitsCount := 0
   558  	for _, unit := range units {
   559  		if unit.UnitStatus.AgentStatus.Status == string(status.Active) {
   560  			readyUnitsCount++
   561  		}
   562  	}
   563  	if st.DesiredReplicas > 0 && st.DesiredReplicas > readyUnitsCount {
   564  		// Only set status to waiting for scale up.
   565  		// When the application gets scaled down, the desired units will be kept running and
   566  		// the application should be active always.
   567  		return setApplicationStatus(appName, status.Waiting, "waiting for units to settle down", nil, facade, logger)
   568  	}
   569  	return setApplicationStatus(appName, status.Active, "", nil, facade, logger)
   570  }
   571  
   572  func waitForTerminated(appName string, app caas.Application,
   573  	clk clock.Clock) error {
   574  	existsFunc := func() error {
   575  		appState, err := app.Exists()
   576  		if err != nil {
   577  			return errors.Trace(err)
   578  		}
   579  		if !appState.Exists {
   580  			return nil
   581  		}
   582  		if appState.Exists && !appState.Terminating {
   583  			return errors.Errorf("application %q should be terminating but is now running", appName)
   584  		}
   585  		return tryAgain
   586  	}
   587  	retryCallArgs := retry.CallArgs{
   588  		Attempts:    60,
   589  		Delay:       3 * time.Second,
   590  		MaxDuration: 3 * time.Minute,
   591  		Clock:       clk,
   592  		Func:        existsFunc,
   593  		IsFatalError: func(err error) bool {
   594  			return !errors.Is(err, tryAgain)
   595  		},
   596  	}
   597  	return errors.Trace(retry.Call(retryCallArgs))
   598  }
   599  
   600  // reconcileDeadUnitScale is setup to respond to CAAS sidecard units that become
   601  // dead. It takes stock of what the current desired scale is for the application
   602  // and the number of dead units in the application. Once the number of dead units
   603  // has reached the a point where the desired scale has been achieved this func
   604  // can go ahead and removed the units from CAAS provider.
   605  func reconcileDeadUnitScale(appName string, app caas.Application,
   606  	facade CAASProvisionerFacade, logger Logger) error {
   607  	units, err := facade.Units(appName)
   608  	if err != nil {
   609  		return fmt.Errorf("getting units for application %s: %w", appName, err)
   610  	}
   611  
   612  	ps, err := facade.ProvisioningState(appName)
   613  	if err != nil {
   614  		return errors.Trace(err)
   615  	}
   616  	if ps == nil || !ps.Scaling {
   617  		return nil
   618  	}
   619  
   620  	desiredScale := ps.ScaleTarget
   621  	unitsToRemove := len(units) - desiredScale
   622  
   623  	var deadUnits []params.CAASUnit
   624  	for _, unit := range units {
   625  		unitLife, err := facade.Life(unit.Tag.Id())
   626  		if err != nil {
   627  			return fmt.Errorf("getting life for unit %q: %w", unit.Tag, err)
   628  		}
   629  		if unitLife == life.Dead {
   630  			deadUnits = append(deadUnits, unit)
   631  		}
   632  	}
   633  
   634  	if unitsToRemove <= 0 {
   635  		unitsToRemove = len(deadUnits)
   636  	}
   637  
   638  	// We haven't met the threshold to initiate scale down in the CAAS provider
   639  	// yet.
   640  	if unitsToRemove != len(deadUnits) {
   641  		return nil
   642  	}
   643  
   644  	logger.Infof("scaling application %q to desired scale %d", appName, desiredScale)
   645  	if err := app.Scale(desiredScale); err != nil && !errors.Is(err, errors.NotFound) {
   646  		return fmt.Errorf(
   647  			"scaling application %q to scale %d: %w",
   648  			appName,
   649  			desiredScale,
   650  			err,
   651  		)
   652  	}
   653  
   654  	appState, err := app.State()
   655  	if err != nil && !errors.Is(err, errors.NotFound) {
   656  		return err
   657  	}
   658  	// TODO: stop k8s things from mutating the statefulset.
   659  	if len(appState.Replicas) > desiredScale {
   660  		return tryAgain
   661  	}
   662  
   663  	for _, deadUnit := range deadUnits {
   664  		logger.Infof("removing dead unit %s", deadUnit.Tag.Id())
   665  		if err := facade.RemoveUnit(deadUnit.Tag.Id()); err != nil && !errors.Is(err, errors.NotFound) {
   666  			return fmt.Errorf("removing dead unit %q: %w", deadUnit.Tag.Id(), err)
   667  		}
   668  	}
   669  
   670  	return updateProvisioningState(appName, false, 0, facade)
   671  }
   672  
   673  // ensureScale determines how and when to scale up or down based on
   674  // current scale targets that have yet to be met.
   675  func ensureScale(appName string, app caas.Application, appLife life.Value,
   676  	facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error {
   677  	var err error
   678  	var desiredScale int
   679  	switch appLife {
   680  	case life.Alive:
   681  		desiredScale, err = unitFacade.ApplicationScale(appName)
   682  		if err != nil {
   683  			return errors.Annotatef(err, "fetching application %q desired scale", appName)
   684  		}
   685  	case life.Dying, life.Dead:
   686  		desiredScale = 0
   687  	default:
   688  		return errors.NotImplementedf("unknown life %q", appLife)
   689  	}
   690  
   691  	ps, err := facade.ProvisioningState(appName)
   692  	if err != nil {
   693  		return errors.Trace(err)
   694  	}
   695  	if ps == nil {
   696  		ps = &params.CAASApplicationProvisioningState{}
   697  	}
   698  
   699  	logger.Debugf("updating application %q scale to %d", appName, desiredScale)
   700  	if !ps.Scaling || appLife != life.Alive {
   701  		err := updateProvisioningState(appName, true, desiredScale, facade)
   702  		if err != nil {
   703  			return err
   704  		}
   705  		ps.Scaling = true
   706  		ps.ScaleTarget = desiredScale
   707  	}
   708  
   709  	units, err := facade.Units(appName)
   710  	if err != nil {
   711  		return err
   712  	}
   713  	if ps.ScaleTarget >= len(units) {
   714  		logger.Infof("scaling application %q to desired scale %d", appName, ps.ScaleTarget)
   715  		err = app.Scale(ps.ScaleTarget)
   716  		if appLife != life.Alive && errors.Is(err, errors.NotFound) {
   717  			logger.Infof("dying application %q is already removed", appName)
   718  		} else if err != nil {
   719  			return err
   720  		}
   721  		return updateProvisioningState(appName, false, 0, facade)
   722  	}
   723  
   724  	unitsToDestroy, err := app.UnitsToRemove(context.TODO(), ps.ScaleTarget)
   725  	if err != nil && errors.Is(err, errors.NotFound) {
   726  		return nil
   727  	} else if err != nil {
   728  		return fmt.Errorf("scaling application %q to desired scale %d: %w",
   729  			appName, ps.ScaleTarget, err)
   730  	}
   731  
   732  	if len(unitsToDestroy) > 0 {
   733  		if err := facade.DestroyUnits(unitsToDestroy); err != nil {
   734  			return errors.Trace(err)
   735  		}
   736  	}
   737  
   738  	if ps.ScaleTarget != desiredScale {
   739  		// if the current scale target doesn't equal the desired scale
   740  		// we need to rerun this.
   741  		logger.Debugf("application %q currently scaling to %d but desired scale is %d", appName, ps.ScaleTarget, desiredScale)
   742  		return tryAgain
   743  	}
   744  
   745  	return nil
   746  }
   747  
   748  func setApplicationStatus(appName string, s status.Status, reason string, data map[string]interface{},
   749  	facade CAASProvisionerFacade, logger Logger) error {
   750  	logger.Tracef("updating application %q status to %q, %q, %v", appName, s, reason, data)
   751  	return facade.SetOperatorStatus(appName, s, reason, data)
   752  }
   753  
   754  func updateProvisioningState(appName string, scaling bool, scaleTarget int,
   755  	facade CAASProvisionerFacade) error {
   756  	newPs := params.CAASApplicationProvisioningState{
   757  		Scaling:     scaling,
   758  		ScaleTarget: scaleTarget,
   759  	}
   760  	err := facade.SetProvisioningState(appName, newPs)
   761  	if params.IsCodeTryAgain(err) {
   762  		return tryAgain
   763  	} else if err != nil {
   764  		return errors.Annotatef(err, "setting provisiong state for application %q", appName)
   765  	}
   766  	return nil
   767  }