github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/caasapplicationprovisioner/ops.go (about) 1 // Copyright 2023 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package caasapplicationprovisioner 5 6 import ( 7 "context" 8 "fmt" 9 "reflect" 10 "strings" 11 "time" 12 13 "github.com/juju/charm/v12" 14 "github.com/juju/clock" 15 "github.com/juju/errors" 16 "github.com/juju/names/v5" 17 "github.com/juju/retry" 18 19 "github.com/juju/juju/caas" 20 "github.com/juju/juju/cloudconfig/podcfg" 21 "github.com/juju/juju/core/life" 22 "github.com/juju/juju/core/status" 23 "github.com/juju/juju/rpc/params" 24 ) 25 26 // ApplicationOps defines all the operations the application worker can perform. 27 // This is exported for testing only. 28 type ApplicationOps interface { 29 AppAlive(appName string, app caas.Application, password string, lastApplied *caas.ApplicationConfig, 30 facade CAASProvisionerFacade, clk clock.Clock, logger Logger) error 31 32 AppDying(appName string, app caas.Application, appLife life.Value, 33 facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error 34 35 AppDead(appName string, app caas.Application, 36 broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, clk clock.Clock, logger Logger) error 37 38 VerifyCharmUpgraded(appName string, 39 facade CAASProvisionerFacade, tomb Tomb, logger Logger) (shouldExit bool, err error) 40 41 UpgradePodSpec(appName string, 42 broker CAASBroker, clk clock.Clock, tomb Tomb, logger Logger) error 43 44 EnsureTrust(appName string, app caas.Application, 45 unitFacade CAASUnitProvisionerFacade, logger Logger) error 46 47 UpdateState(appName string, app caas.Application, lastReportedStatus map[string]status.StatusInfo, 48 broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) (map[string]status.StatusInfo, error) 49 50 RefreshApplicationStatus(appName string, app caas.Application, appLife life.Value, 51 facade CAASProvisionerFacade, logger Logger) error 52 53 WaitForTerminated(appName string, app caas.Application, 54 clk clock.Clock) error 55 56 ReconcileDeadUnitScale(appName string, app caas.Application, 57 facade CAASProvisionerFacade, logger Logger) error 58 59 EnsureScale(appName string, app caas.Application, appLife life.Value, 60 facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error 61 } 62 63 type applicationOps struct { 64 } 65 66 func (applicationOps) AppAlive(appName string, app caas.Application, password string, lastApplied *caas.ApplicationConfig, 67 facade CAASProvisionerFacade, clk clock.Clock, logger Logger) error { 68 return appAlive(appName, app, password, lastApplied, facade, clk, logger) 69 } 70 71 func (applicationOps) AppDying(appName string, app caas.Application, appLife life.Value, 72 facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error { 73 return appDying(appName, app, appLife, facade, unitFacade, logger) 74 } 75 76 func (applicationOps) AppDead(appName string, app caas.Application, 77 broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, clk clock.Clock, logger Logger) error { 78 return appDead(appName, app, broker, facade, unitFacade, clk, logger) 79 } 80 81 func (applicationOps) VerifyCharmUpgraded(appName string, 82 facade CAASProvisionerFacade, tomb Tomb, logger Logger) (shouldExit bool, err error) { 83 return verifyCharmUpgraded(appName, facade, tomb, logger) 84 } 85 86 func (applicationOps) UpgradePodSpec(appName string, 87 broker CAASBroker, clk clock.Clock, tomb Tomb, logger Logger) error { 88 return upgradePodSpec(appName, broker, clk, tomb, logger) 89 } 90 91 func (applicationOps) EnsureTrust(appName string, app caas.Application, 92 unitFacade CAASUnitProvisionerFacade, logger Logger) error { 93 return ensureTrust(appName, app, unitFacade, logger) 94 } 95 96 func (applicationOps) UpdateState(appName string, app caas.Application, lastReportedStatus map[string]status.StatusInfo, 97 broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) (map[string]status.StatusInfo, error) { 98 return updateState(appName, app, lastReportedStatus, broker, facade, unitFacade, logger) 99 } 100 101 func (applicationOps) RefreshApplicationStatus(appName string, app caas.Application, appLife life.Value, 102 facade CAASProvisionerFacade, logger Logger) error { 103 return refreshApplicationStatus(appName, app, appLife, facade, logger) 104 } 105 106 func (applicationOps) WaitForTerminated(appName string, app caas.Application, 107 clk clock.Clock) error { 108 return waitForTerminated(appName, app, clk) 109 } 110 111 func (applicationOps) ReconcileDeadUnitScale(appName string, app caas.Application, 112 facade CAASProvisionerFacade, logger Logger) error { 113 return reconcileDeadUnitScale(appName, app, facade, logger) 114 } 115 116 func (applicationOps) EnsureScale(appName string, app caas.Application, appLife life.Value, 117 facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error { 118 return ensureScale(appName, app, appLife, facade, unitFacade, logger) 119 } 120 121 type Tomb interface { 122 Dying() <-chan struct{} 123 ErrDying() error 124 } 125 126 // appAlive handles the life.Alive state for the CAAS application. It handles invoking the 127 // CAAS broker to create the resources in the k8s cluster for this application. 128 func appAlive(appName string, app caas.Application, password string, lastApplied *caas.ApplicationConfig, 129 facade CAASProvisionerFacade, clk clock.Clock, logger Logger) error { 130 logger.Debugf("ensuring application %q exists", appName) 131 132 provisionInfo, err := facade.ProvisioningInfo(appName) 133 if err != nil { 134 return errors.Annotate(err, "retrieving provisioning info") 135 } 136 if provisionInfo.CharmURL == nil { 137 return errors.Errorf("missing charm url in provision info") 138 } 139 140 charmInfo, err := facade.CharmInfo(provisionInfo.CharmURL.String()) 141 if err != nil { 142 return errors.Annotatef(err, "retrieving charm deployment info for %q", appName) 143 } 144 145 appState, err := app.Exists() 146 if err != nil { 147 return errors.Annotatef(err, "retrieving application state for %q", appName) 148 } 149 150 if appState.Exists && appState.Terminating { 151 if err := waitForTerminated(appName, app, clk); err != nil { 152 return errors.Annotatef(err, "%q was terminating and there was an error waiting for it to stop", appName) 153 } 154 } 155 156 images, err := facade.ApplicationOCIResources(appName) 157 if err != nil { 158 return errors.Annotate(err, "getting OCI image resources") 159 } 160 161 ch := charmInfo.Charm() 162 charmBaseImage, err := podcfg.ImageForBase(provisionInfo.ImageDetails.Repository, charm.Base{ 163 Name: provisionInfo.Base.OS, 164 Channel: charm.Channel{ 165 Track: provisionInfo.Base.Channel.Track, 166 Risk: charm.Risk(provisionInfo.Base.Channel.Risk), 167 }, 168 }) 169 if err != nil { 170 return errors.Annotate(err, "getting image for base") 171 } 172 173 containers := make(map[string]caas.ContainerConfig) 174 for k, v := range ch.Meta().Containers { 175 container := caas.ContainerConfig{ 176 Name: k, 177 Uid: v.Uid, 178 Gid: v.Gid, 179 } 180 if v.Resource == "" { 181 return errors.NotValidf("empty container resource reference") 182 } 183 image, ok := images[v.Resource] 184 if !ok { 185 return errors.NotFoundf("referenced charm base image resource %s", v.Resource) 186 } 187 container.Image = image 188 for _, m := range v.Mounts { 189 container.Mounts = append(container.Mounts, caas.MountConfig{ 190 StorageName: m.Storage, 191 Path: m.Location, 192 }) 193 } 194 containers[k] = container 195 } 196 197 // TODO(sidecar): container.Mounts[*].Path <= consolidate? => provisionInfo.Filesystems[*].Attachment.Path 198 config := caas.ApplicationConfig{ 199 IsPrivateImageRepo: provisionInfo.ImageDetails.IsPrivate(), 200 IntroductionSecret: password, 201 AgentVersion: provisionInfo.Version, 202 AgentImagePath: provisionInfo.ImageDetails.RegistryPath, 203 ControllerAddresses: strings.Join(provisionInfo.APIAddresses, ","), 204 ControllerCertBundle: provisionInfo.CACert, 205 ResourceTags: provisionInfo.Tags, 206 Constraints: provisionInfo.Constraints, 207 Filesystems: provisionInfo.Filesystems, 208 Devices: provisionInfo.Devices, 209 CharmBaseImagePath: charmBaseImage, 210 Containers: containers, 211 CharmModifiedVersion: provisionInfo.CharmModifiedVersion, 212 Trust: provisionInfo.Trust, 213 InitialScale: provisionInfo.Scale, 214 } 215 switch ch.Meta().CharmUser { 216 case charm.RunAsDefault, charm.RunAsRoot: 217 config.CharmUser = caas.RunAsRoot 218 case charm.RunAsSudoer: 219 // TODO(pebble): once pebble supports auth, allow running as non-root. 220 //config.CharmUser = caas.RunAsSudoer 221 config.CharmUser = caas.RunAsRoot 222 case charm.RunAsNonRoot: 223 // TODO(pebble): once pebble supports auth, allow running as non-root. 224 //config.CharmUser = caas.RunAsNonRoot 225 config.CharmUser = caas.RunAsRoot 226 default: 227 return errors.NotValidf("unknown RunAs for CharmUser: %q", ch.Meta().CharmUser) 228 } 229 reason := "unchanged" 230 // TODO(sidecar): implement Equals method for caas.ApplicationConfig 231 if !reflect.DeepEqual(config, *lastApplied) { 232 if err = app.Ensure(config); err != nil { 233 _ = setApplicationStatus(appName, status.Error, err.Error(), nil, facade, logger) 234 return errors.Annotatef(err, "ensuring application %q", appName) 235 } 236 *lastApplied = config 237 reason = "deployed" 238 if appState.Exists { 239 reason = "updated" 240 } 241 } 242 logger.Debugf("application %q was %q", appName, reason) 243 return nil 244 } 245 246 // appDying handles the life.Dying state for the CAAS application. It deals with scaling down 247 // the application and removing units. 248 func appDying(appName string, app caas.Application, appLife life.Value, 249 facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error { 250 logger.Debugf("application %q dying", appName) 251 err := ensureScale(appName, app, appLife, facade, unitFacade, logger) 252 if err != nil { 253 return errors.Annotate(err, "cannot scale dying application to 0") 254 } 255 err = reconcileDeadUnitScale(appName, app, facade, logger) 256 if err != nil { 257 return errors.Annotate(err, "cannot reconcile dead units in dying application") 258 } 259 return nil 260 } 261 262 // appDead handles the life.Dead state for the CAAS application. It ensures the application 263 // is removed from the k8s cluster and unblocks the cleanup of the application in state. 264 func appDead(appName string, app caas.Application, 265 broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, clk clock.Clock, logger Logger) error { 266 logger.Debugf("application %q dead", appName) 267 err := app.Delete() 268 if err != nil { 269 return errors.Trace(err) 270 } 271 err = waitForTerminated(appName, app, clk) 272 if err != nil { 273 return errors.Trace(err) 274 } 275 _, err = updateState(appName, app, nil, broker, facade, unitFacade, logger) 276 if err != nil { 277 return errors.Trace(err) 278 } 279 // Clear "has-resources" flag so state knows it can now remove the application. 280 err = facade.ClearApplicationResources(appName) 281 if err != nil { 282 return errors.Trace(err) 283 } 284 return nil 285 } 286 287 // verifyCharmUpgraded waits till the charm is upgraded to a v2 charm. 288 func verifyCharmUpgraded(appName string, 289 facade CAASProvisionerFacade, tomb Tomb, logger Logger) (shouldExit bool, err error) { 290 appStateWatcher, err := facade.WatchApplication(appName) 291 if err != nil { 292 return false, errors.Annotatef(err, "failed to watch for changes to application %q when verifying charm upgrade", appName) 293 } 294 defer appStateWatcher.Kill() 295 296 appStateChanges := appStateWatcher.Changes() 297 for { 298 charmInfo, err := facade.ApplicationCharmInfo(appName) 299 if errors.Is(err, errors.NotFound) { 300 logger.Debugf("application %q no longer exists", appName) 301 return true, nil 302 } else if err != nil { 303 return false, errors.Annotatef(err, "failed to get charm info for application %q", appName) 304 } 305 format := charm.MetaFormat(charmInfo.Charm()) 306 if format >= charm.FormatV2 { 307 logger.Debugf("application %q is now a v2 charm", appName) 308 return false, nil 309 } 310 311 appLife, err := facade.Life(appName) 312 if errors.Is(err, errors.NotFound) { 313 logger.Debugf("application %q no longer exists", appName) 314 return true, nil 315 } else if err != nil { 316 return false, errors.Trace(err) 317 } 318 if appLife == life.Dead { 319 logger.Debugf("application %q now dead", appName) 320 return true, nil 321 } 322 323 // Wait for next app change, then loop to check charm format again. 324 select { 325 case <-appStateChanges: 326 case <-tomb.Dying(): 327 return false, tomb.ErrDying() 328 } 329 } 330 } 331 332 // upgradePodSpec checks to see if the application used to be a podspec statefulset charm 333 // and then to trigger an upgrade and wait for it to complete. 334 func upgradePodSpec(appName string, 335 broker CAASBroker, clk clock.Clock, tomb Tomb, logger Logger) error { 336 // If the application has an operator pod due to upgrading the charm from a pod-spec charm 337 // to a sidecar charm, delete it. Also delete workload pod. 338 const maxDeleteLoops = 20 339 for i := 0; ; i++ { 340 if i >= maxDeleteLoops { 341 return fmt.Errorf("couldn't delete operator and service with %d tries", maxDeleteLoops) 342 } 343 if i > 0 { 344 select { 345 case <-clk.After(3 * time.Second): 346 case <-tomb.Dying(): 347 return tomb.ErrDying() 348 } 349 } 350 351 exists, err := broker.OperatorExists(appName) 352 if err != nil { 353 return errors.Annotatef(err, "checking if %q has an operator pod due to upgrading the charm from a pod-spec charm to a sidecar charm", appName) 354 } 355 if !exists.Exists { 356 break 357 } 358 359 logger.Infof("app %q has just been upgraded from a podspec charm to sidecar, now deleting workload and operator pods", appName) 360 err = broker.DeleteService(appName) 361 if err != nil && !errors.Is(err, errors.NotFound) { 362 return errors.Annotatef(err, "deleting workload pod for application %q", appName) 363 } 364 365 // Wait till the units are gone, to ensure worker code isn't messing 366 // with old units, only new sidecar pods. 367 const maxUnitsLoops = 20 368 for j := 0; ; j++ { 369 if j >= maxUnitsLoops { 370 return fmt.Errorf("pods still present after %d tries", maxUnitsLoops) 371 } 372 units, err := broker.Units(appName, caas.ModeWorkload) 373 if err != nil && !errors.Is(err, errors.NotFound) { 374 return errors.Annotatef(err, "fetching workload units for application %q", appName) 375 } 376 if len(units) == 0 { 377 break 378 } 379 logger.Debugf("%q: waiting for workload pods to be deleted", appName) 380 select { 381 case <-clk.After(3 * time.Second): 382 case <-tomb.Dying(): 383 return tomb.ErrDying() 384 } 385 } 386 387 err = broker.DeleteOperator(appName) 388 if err != nil && !errors.Is(err, errors.NotFound) { 389 return errors.Annotatef(err, "deleting operator pod for application %q", appName) 390 } 391 } 392 return nil 393 } 394 395 // ensureTrust updates the applications Trust status on the CAAS broker, giving it 396 // access to the k8s api via a service account. 397 func ensureTrust(appName string, app caas.Application, 398 unitFacade CAASUnitProvisionerFacade, logger Logger) error { 399 desiredTrust, err := unitFacade.ApplicationTrust(appName) 400 if err != nil { 401 return errors.Annotatef(err, "fetching application %q desired trust", appName) 402 } 403 404 logger.Debugf("updating application %q trust to %v", appName, desiredTrust) 405 err = app.Trust(desiredTrust) 406 if err != nil { 407 return errors.Annotatef( 408 err, 409 "updating application %q to desired trust %v", 410 appName, 411 desiredTrust) 412 } 413 return nil 414 } 415 416 // updateState reports back information about the CAAS application into state, such as 417 // status, IP addresses and volume info. 418 func updateState(appName string, app caas.Application, lastReportedStatus map[string]status.StatusInfo, 419 broker CAASBroker, facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) (map[string]status.StatusInfo, error) { 420 appTag := names.NewApplicationTag(appName).String() 421 appStatus := params.EntityStatus{} 422 svc, err := app.Service() 423 if err != nil && !errors.Is(err, errors.NotFound) { 424 return nil, errors.Trace(err) 425 } 426 if svc != nil { 427 appStatus = params.EntityStatus{ 428 Status: svc.Status.Status, 429 Info: svc.Status.Message, 430 Data: svc.Status.Data, 431 } 432 err = unitFacade.UpdateApplicationService(params.UpdateApplicationServiceArg{ 433 ApplicationTag: appTag, 434 ProviderId: svc.Id, 435 Addresses: params.FromProviderAddresses(svc.Addresses...), 436 }) 437 if errors.Is(err, errors.NotFound) { 438 // Do nothing 439 } else if err != nil { 440 return nil, errors.Trace(err) 441 } 442 } 443 444 units, err := app.Units() 445 if err != nil { 446 return nil, errors.Trace(err) 447 } 448 449 reportedStatus := make(map[string]status.StatusInfo) 450 args := params.UpdateApplicationUnits{ 451 ApplicationTag: appTag, 452 Status: appStatus, 453 } 454 for _, u := range units { 455 // For pods managed by the substrate, any marked as dying 456 // are treated as non-existing. 457 if u.Dying { 458 continue 459 } 460 unitStatus := u.Status 461 lastStatus, ok := lastReportedStatus[u.Id] 462 reportedStatus[u.Id] = unitStatus 463 // TODO: Determine a better way to propagate status 464 // without constantly overriding the juju state value. 465 if ok { 466 // If we've seen the same status value previously, 467 // report as unknown as this value is ignored. 468 if reflect.DeepEqual(lastStatus, unitStatus) { 469 unitStatus = status.StatusInfo{ 470 Status: status.Unknown, 471 } 472 } 473 } 474 unitParams := params.ApplicationUnitParams{ 475 ProviderId: u.Id, 476 Address: u.Address, 477 Ports: u.Ports, 478 Stateful: u.Stateful, 479 Status: unitStatus.Status.String(), 480 Info: unitStatus.Message, 481 Data: unitStatus.Data, 482 } 483 // Fill in any filesystem info for volumes attached to the unit. 484 // A unit will not become active until all required volumes are 485 // provisioned, so it makes sense to send this information along 486 // with the units to which they are attached. 487 for _, info := range u.FilesystemInfo { 488 unitParams.FilesystemInfo = append(unitParams.FilesystemInfo, params.KubernetesFilesystemInfo{ 489 StorageName: info.StorageName, 490 FilesystemId: info.FilesystemId, 491 Size: info.Size, 492 MountPoint: info.MountPoint, 493 ReadOnly: info.ReadOnly, 494 Status: info.Status.Status.String(), 495 Info: info.Status.Message, 496 Data: info.Status.Data, 497 Volume: params.KubernetesVolumeInfo{ 498 VolumeId: info.Volume.VolumeId, 499 Size: info.Volume.Size, 500 Persistent: info.Volume.Persistent, 501 Status: info.Volume.Status.Status.String(), 502 Info: info.Volume.Status.Message, 503 Data: info.Volume.Status.Data, 504 }, 505 }) 506 } 507 args.Units = append(args.Units, unitParams) 508 } 509 510 appUnitInfo, err := facade.UpdateUnits(args) 511 if err != nil { 512 // We can ignore not found errors as the worker will get stopped anyway. 513 // We can also ignore Forbidden errors raised from SetScale because disordered events could happen often. 514 if !errors.Is(err, errors.Forbidden) && !errors.Is(err, errors.NotFound) { 515 return nil, errors.Trace(err) 516 } 517 logger.Warningf("update units %v", err) 518 } 519 520 if appUnitInfo != nil { 521 for _, unitInfo := range appUnitInfo.Units { 522 unit, err := names.ParseUnitTag(unitInfo.UnitTag) 523 if err != nil { 524 return nil, errors.Trace(err) 525 } 526 err = broker.AnnotateUnit(appName, caas.ModeSidecar, unitInfo.ProviderId, unit) 527 if errors.Is(err, errors.NotFound) { 528 continue 529 } else if err != nil { 530 return nil, errors.Trace(err) 531 } 532 } 533 } 534 return reportedStatus, nil 535 } 536 537 func refreshApplicationStatus(appName string, app caas.Application, appLife life.Value, 538 facade CAASProvisionerFacade, logger Logger) error { 539 if appLife != life.Alive { 540 return nil 541 } 542 st, err := app.State() 543 if errors.Is(err, errors.NotFound) { 544 // Do nothing. 545 return nil 546 } else if err != nil { 547 return errors.Trace(err) 548 } 549 550 // refresh the units information. 551 units, err := facade.Units(appName) 552 if errors.Is(err, errors.NotFound) { 553 return nil 554 } else if err != nil { 555 return errors.Trace(err) 556 } 557 readyUnitsCount := 0 558 for _, unit := range units { 559 if unit.UnitStatus.AgentStatus.Status == string(status.Active) { 560 readyUnitsCount++ 561 } 562 } 563 if st.DesiredReplicas > 0 && st.DesiredReplicas > readyUnitsCount { 564 // Only set status to waiting for scale up. 565 // When the application gets scaled down, the desired units will be kept running and 566 // the application should be active always. 567 return setApplicationStatus(appName, status.Waiting, "waiting for units to settle down", nil, facade, logger) 568 } 569 return setApplicationStatus(appName, status.Active, "", nil, facade, logger) 570 } 571 572 func waitForTerminated(appName string, app caas.Application, 573 clk clock.Clock) error { 574 existsFunc := func() error { 575 appState, err := app.Exists() 576 if err != nil { 577 return errors.Trace(err) 578 } 579 if !appState.Exists { 580 return nil 581 } 582 if appState.Exists && !appState.Terminating { 583 return errors.Errorf("application %q should be terminating but is now running", appName) 584 } 585 return tryAgain 586 } 587 retryCallArgs := retry.CallArgs{ 588 Attempts: 60, 589 Delay: 3 * time.Second, 590 MaxDuration: 3 * time.Minute, 591 Clock: clk, 592 Func: existsFunc, 593 IsFatalError: func(err error) bool { 594 return !errors.Is(err, tryAgain) 595 }, 596 } 597 return errors.Trace(retry.Call(retryCallArgs)) 598 } 599 600 // reconcileDeadUnitScale is setup to respond to CAAS sidecard units that become 601 // dead. It takes stock of what the current desired scale is for the application 602 // and the number of dead units in the application. Once the number of dead units 603 // has reached the a point where the desired scale has been achieved this func 604 // can go ahead and removed the units from CAAS provider. 605 func reconcileDeadUnitScale(appName string, app caas.Application, 606 facade CAASProvisionerFacade, logger Logger) error { 607 units, err := facade.Units(appName) 608 if err != nil { 609 return fmt.Errorf("getting units for application %s: %w", appName, err) 610 } 611 612 ps, err := facade.ProvisioningState(appName) 613 if err != nil { 614 return errors.Trace(err) 615 } 616 if ps == nil || !ps.Scaling { 617 return nil 618 } 619 620 desiredScale := ps.ScaleTarget 621 unitsToRemove := len(units) - desiredScale 622 623 var deadUnits []params.CAASUnit 624 for _, unit := range units { 625 unitLife, err := facade.Life(unit.Tag.Id()) 626 if err != nil { 627 return fmt.Errorf("getting life for unit %q: %w", unit.Tag, err) 628 } 629 if unitLife == life.Dead { 630 deadUnits = append(deadUnits, unit) 631 } 632 } 633 634 if unitsToRemove <= 0 { 635 unitsToRemove = len(deadUnits) 636 } 637 638 // We haven't met the threshold to initiate scale down in the CAAS provider 639 // yet. 640 if unitsToRemove != len(deadUnits) { 641 return nil 642 } 643 644 logger.Infof("scaling application %q to desired scale %d", appName, desiredScale) 645 if err := app.Scale(desiredScale); err != nil && !errors.Is(err, errors.NotFound) { 646 return fmt.Errorf( 647 "scaling application %q to scale %d: %w", 648 appName, 649 desiredScale, 650 err, 651 ) 652 } 653 654 appState, err := app.State() 655 if err != nil && !errors.Is(err, errors.NotFound) { 656 return err 657 } 658 // TODO: stop k8s things from mutating the statefulset. 659 if len(appState.Replicas) > desiredScale { 660 return tryAgain 661 } 662 663 for _, deadUnit := range deadUnits { 664 logger.Infof("removing dead unit %s", deadUnit.Tag.Id()) 665 if err := facade.RemoveUnit(deadUnit.Tag.Id()); err != nil && !errors.Is(err, errors.NotFound) { 666 return fmt.Errorf("removing dead unit %q: %w", deadUnit.Tag.Id(), err) 667 } 668 } 669 670 return updateProvisioningState(appName, false, 0, facade) 671 } 672 673 // ensureScale determines how and when to scale up or down based on 674 // current scale targets that have yet to be met. 675 func ensureScale(appName string, app caas.Application, appLife life.Value, 676 facade CAASProvisionerFacade, unitFacade CAASUnitProvisionerFacade, logger Logger) error { 677 var err error 678 var desiredScale int 679 switch appLife { 680 case life.Alive: 681 desiredScale, err = unitFacade.ApplicationScale(appName) 682 if err != nil { 683 return errors.Annotatef(err, "fetching application %q desired scale", appName) 684 } 685 case life.Dying, life.Dead: 686 desiredScale = 0 687 default: 688 return errors.NotImplementedf("unknown life %q", appLife) 689 } 690 691 ps, err := facade.ProvisioningState(appName) 692 if err != nil { 693 return errors.Trace(err) 694 } 695 if ps == nil { 696 ps = ¶ms.CAASApplicationProvisioningState{} 697 } 698 699 logger.Debugf("updating application %q scale to %d", appName, desiredScale) 700 if !ps.Scaling || appLife != life.Alive { 701 err := updateProvisioningState(appName, true, desiredScale, facade) 702 if err != nil { 703 return err 704 } 705 ps.Scaling = true 706 ps.ScaleTarget = desiredScale 707 } 708 709 units, err := facade.Units(appName) 710 if err != nil { 711 return err 712 } 713 if ps.ScaleTarget >= len(units) { 714 logger.Infof("scaling application %q to desired scale %d", appName, ps.ScaleTarget) 715 err = app.Scale(ps.ScaleTarget) 716 if appLife != life.Alive && errors.Is(err, errors.NotFound) { 717 logger.Infof("dying application %q is already removed", appName) 718 } else if err != nil { 719 return err 720 } 721 return updateProvisioningState(appName, false, 0, facade) 722 } 723 724 unitsToDestroy, err := app.UnitsToRemove(context.TODO(), ps.ScaleTarget) 725 if err != nil && errors.Is(err, errors.NotFound) { 726 return nil 727 } else if err != nil { 728 return fmt.Errorf("scaling application %q to desired scale %d: %w", 729 appName, ps.ScaleTarget, err) 730 } 731 732 if len(unitsToDestroy) > 0 { 733 if err := facade.DestroyUnits(unitsToDestroy); err != nil { 734 return errors.Trace(err) 735 } 736 } 737 738 if ps.ScaleTarget != desiredScale { 739 // if the current scale target doesn't equal the desired scale 740 // we need to rerun this. 741 logger.Debugf("application %q currently scaling to %d but desired scale is %d", appName, ps.ScaleTarget, desiredScale) 742 return tryAgain 743 } 744 745 return nil 746 } 747 748 func setApplicationStatus(appName string, s status.Status, reason string, data map[string]interface{}, 749 facade CAASProvisionerFacade, logger Logger) error { 750 logger.Tracef("updating application %q status to %q, %q, %v", appName, s, reason, data) 751 return facade.SetOperatorStatus(appName, s, reason, data) 752 } 753 754 func updateProvisioningState(appName string, scaling bool, scaleTarget int, 755 facade CAASProvisionerFacade) error { 756 newPs := params.CAASApplicationProvisioningState{ 757 Scaling: scaling, 758 ScaleTarget: scaleTarget, 759 } 760 err := facade.SetProvisioningState(appName, newPs) 761 if params.IsCodeTryAgain(err) { 762 return tryAgain 763 } else if err != nil { 764 return errors.Annotatef(err, "setting provisiong state for application %q", appName) 765 } 766 return nil 767 }