github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/caasunitprovisioner/application_worker.go (about)

     1  // Copyright 2017 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package caasunitprovisioner
     5  
     6  import (
     7  	"reflect"
     8  	"strings"
     9  
    10  	"github.com/juju/charm/v12"
    11  	"github.com/juju/errors"
    12  	"github.com/juju/names/v5"
    13  	"github.com/juju/worker/v3"
    14  	"github.com/juju/worker/v3/catacomb"
    15  
    16  	"github.com/juju/juju/caas"
    17  	"github.com/juju/juju/core/status"
    18  	"github.com/juju/juju/core/watcher"
    19  	"github.com/juju/juju/rpc/params"
    20  )
    21  
    22  type applicationWorker struct {
    23  	catacomb        catacomb.Catacomb
    24  	application     string
    25  	mode            caas.DeploymentMode
    26  	serviceBroker   ServiceBroker
    27  	containerBroker ContainerBroker
    28  
    29  	provisioningStatusSetter ProvisioningStatusSetter
    30  	provisioningInfoGetter   ProvisioningInfoGetter
    31  	applicationGetter        ApplicationGetter
    32  	applicationUpdater       ApplicationUpdater
    33  	unitUpdater              UnitUpdater
    34  	charmGetter              CharmGetter
    35  
    36  	logger Logger
    37  }
    38  
    39  func newApplicationWorker(
    40  	application string,
    41  	mode caas.DeploymentMode,
    42  	serviceBroker ServiceBroker,
    43  	containerBroker ContainerBroker,
    44  	provisioningStatusSetter ProvisioningStatusSetter,
    45  	provisioningInfoGetter ProvisioningInfoGetter,
    46  	applicationGetter ApplicationGetter,
    47  	applicationUpdater ApplicationUpdater,
    48  	unitUpdater UnitUpdater,
    49  	charmGetter CharmGetter,
    50  	logger Logger,
    51  ) (*applicationWorker, error) {
    52  	w := &applicationWorker{
    53  		application:              application,
    54  		mode:                     mode,
    55  		serviceBroker:            serviceBroker,
    56  		containerBroker:          containerBroker,
    57  		provisioningStatusSetter: provisioningStatusSetter,
    58  		provisioningInfoGetter:   provisioningInfoGetter,
    59  		applicationGetter:        applicationGetter,
    60  		applicationUpdater:       applicationUpdater,
    61  		unitUpdater:              unitUpdater,
    62  		charmGetter:              charmGetter,
    63  		logger:                   logger,
    64  	}
    65  	if err := catacomb.Invoke(catacomb.Plan{
    66  		Site: &w.catacomb,
    67  		Work: w.loop,
    68  	}); err != nil {
    69  		return nil, errors.Trace(err)
    70  	}
    71  	return w, nil
    72  }
    73  
    74  // Kill is part of the worker.Worker interface.
    75  func (aw *applicationWorker) Kill() {
    76  	aw.catacomb.Kill(nil)
    77  }
    78  
    79  // Wait is part of the worker.Worker interface.
    80  func (aw *applicationWorker) Wait() error {
    81  	return aw.catacomb.Wait()
    82  }
    83  
    84  func (aw *applicationWorker) loop() error {
    85  	if aw.mode == caas.ModeWorkload {
    86  		deploymentWorker, err := newDeploymentWorker(
    87  			aw.application,
    88  			aw.provisioningStatusSetter,
    89  			aw.serviceBroker,
    90  			aw.provisioningInfoGetter,
    91  			aw.applicationGetter,
    92  			aw.applicationUpdater,
    93  			aw.logger,
    94  		)
    95  		if err != nil {
    96  			return errors.Trace(err)
    97  		}
    98  		_ = aw.catacomb.Add(deploymentWorker)
    99  	}
   100  
   101  	var (
   102  		brokerUnitsWatcher watcher.NotifyWatcher
   103  		brokerUnitsChannel watcher.NotifyChannel
   104  
   105  		appOperatorWatcher watcher.NotifyWatcher
   106  		appOperatorChannel watcher.NotifyChannel
   107  
   108  		appDeploymentWatcher watcher.NotifyWatcher
   109  		appDeploymentChannel watcher.NotifyChannel
   110  	)
   111  
   112  	appChangesWatcher, err := aw.applicationGetter.WatchApplication(aw.application)
   113  	if err != nil {
   114  		return errors.Trace(err)
   115  	}
   116  
   117  	// The caas watcher can just die from underneath hence it needs to be
   118  	// restarted all the time. So we don't abuse the catacomb by adding new
   119  	// workers unbounded, use a defer to stop the running worker.
   120  	defer func() {
   121  		if brokerUnitsWatcher != nil {
   122  			_ = worker.Stop(brokerUnitsWatcher)
   123  		}
   124  		if appOperatorWatcher != nil {
   125  			_ = worker.Stop(appOperatorWatcher)
   126  		}
   127  		if appDeploymentWatcher != nil {
   128  			_ = worker.Stop(appDeploymentWatcher)
   129  		}
   130  		_ = worker.Stop(appChangesWatcher)
   131  	}()
   132  
   133  	// Cache the last reported status information
   134  	// so we only report true changes.
   135  	lastReportedStatus := make(map[string]status.StatusInfo)
   136  	lastReportedScale := -1
   137  	initialOperatorEvent := true
   138  	logger := aw.logger
   139  	for {
   140  		var err error
   141  		// The caas watcher can just die from underneath so recreate if needed.
   142  		if brokerUnitsWatcher == nil {
   143  			brokerUnitsWatcher, err = aw.containerBroker.WatchUnits(aw.application, aw.mode)
   144  			if err != nil {
   145  				if strings.Contains(err.Error(), "unexpected EOF") {
   146  					logger.Warningf("k8s cloud hosting %q has disappeared", aw.application, aw.mode)
   147  					return nil
   148  				}
   149  				return errors.Annotatef(err, "failed to start unit watcher for %q", aw.application)
   150  			}
   151  			brokerUnitsChannel = brokerUnitsWatcher.Changes()
   152  		}
   153  		if appOperatorWatcher == nil && aw.mode == caas.ModeWorkload {
   154  			appOperatorWatcher, err = aw.containerBroker.WatchOperator(aw.application)
   155  			if err != nil {
   156  				if strings.Contains(err.Error(), "unexpected EOF") {
   157  					logger.Warningf("k8s cloud hosting %q has disappeared", aw.application)
   158  					return nil
   159  				}
   160  				return errors.Annotatef(err, "failed to start operator watcher for %q", aw.application)
   161  			}
   162  			appOperatorChannel = appOperatorWatcher.Changes()
   163  		}
   164  		if appDeploymentWatcher == nil {
   165  			appDeploymentWatcher, err = aw.serviceBroker.WatchService(aw.application, aw.mode)
   166  			if err != nil {
   167  				if strings.Contains(err.Error(), "unexpected EOF") {
   168  					logger.Warningf("k8s cloud hosting %q has disappeared", aw.application, aw.mode)
   169  					return nil
   170  				}
   171  				return errors.Annotatef(err, "failed to start deployment watcher for %q", aw.application)
   172  			}
   173  			appDeploymentChannel = appDeploymentWatcher.Changes()
   174  		}
   175  
   176  		select {
   177  		// We must handle any processing due to application being removed prior
   178  		// to shutdown so that we don't leave stuff running in the cloud.
   179  		case <-aw.catacomb.Dying():
   180  			return aw.catacomb.ErrDying()
   181  		case _, ok := <-brokerUnitsChannel:
   182  			logger.Debugf("units changed: %#v", ok)
   183  			if !ok {
   184  				logger.Debugf("%v", brokerUnitsWatcher.Wait())
   185  				_ = worker.Stop(brokerUnitsWatcher)
   186  				brokerUnitsWatcher = nil
   187  				continue
   188  			}
   189  			service, err := aw.serviceBroker.GetService(aw.application, aw.mode, false)
   190  			if err != nil && !errors.IsNotFound(err) {
   191  				return errors.Trace(err)
   192  			}
   193  			logger.Debugf("service for %v(%v): %+v", aw.application, aw.mode, service)
   194  			if err := aw.clusterChanged(service, lastReportedStatus, true); err != nil {
   195  				// TODO(caas): change the shouldSetScale to false here once appDeploymentWatcher can get all events from k8s.
   196  				return errors.Trace(err)
   197  			}
   198  		case _, ok := <-appDeploymentChannel:
   199  			logger.Debugf("deployment changed: %#v", ok)
   200  			if !ok {
   201  				logger.Debugf("%v", appDeploymentWatcher.Wait())
   202  				_ = worker.Stop(appDeploymentWatcher)
   203  				appDeploymentWatcher = nil
   204  				continue
   205  			}
   206  			service, err := aw.serviceBroker.GetService(aw.application, aw.mode, false)
   207  			if err != nil && !errors.IsNotFound(err) {
   208  				return errors.Trace(err)
   209  			}
   210  			haveNewStatus := true
   211  			if service.Id != "" {
   212  				// update svc info (addresses etc.) cloudservices.
   213  				err = updateApplicationService(
   214  					names.NewApplicationTag(aw.application), service, aw.applicationUpdater,
   215  				)
   216  				if errors.IsForbidden(err) {
   217  					// ignore errors raised from SetScale because disordered events could happen often.
   218  					logger.Warningf("%v", err)
   219  				} else if err != nil {
   220  					return errors.Trace(err)
   221  				}
   222  				lastStatus, ok := lastReportedStatus[service.Id]
   223  				lastReportedStatus[service.Id] = service.Status
   224  				if ok {
   225  					// If we've seen the same status value previously,
   226  					// report as unknown as this value is ignored.
   227  					if reflect.DeepEqual(lastStatus, service.Status) {
   228  						service.Status = status.StatusInfo{
   229  							Status: status.Unknown,
   230  						}
   231  						haveNewStatus = false
   232  					}
   233  				}
   234  			}
   235  			if service != nil && service.Scale != nil {
   236  				if *service.Scale == lastReportedScale && !haveNewStatus {
   237  					continue
   238  				}
   239  				lastReportedScale = *service.Scale
   240  			}
   241  			if err := aw.clusterChanged(service, lastReportedStatus, true); err != nil {
   242  				return errors.Trace(err)
   243  			}
   244  		case _, ok := <-appOperatorChannel:
   245  			if !ok {
   246  				logger.Debugf("%v", appOperatorWatcher.Wait())
   247  				_ = worker.Stop(appOperatorWatcher)
   248  				appOperatorWatcher = nil
   249  				continue
   250  			}
   251  			logger.Debugf("operator update for %v", aw.application)
   252  			operator, err := aw.containerBroker.Operator(aw.application)
   253  			if errors.IsNotFound(err) {
   254  				if initialOperatorEvent {
   255  					initialOperatorEvent = false
   256  					continue
   257  				}
   258  				logger.Debugf("pod not found for application %q", aw.application)
   259  				if err := aw.provisioningStatusSetter.SetOperatorStatus(aw.application, status.Terminated, "", nil); err != nil {
   260  					return errors.Trace(err)
   261  				}
   262  			} else if err != nil {
   263  				return errors.Trace(err)
   264  			} else {
   265  				if err := aw.provisioningStatusSetter.SetOperatorStatus(aw.application, operator.Status.Status, operator.Status.Message, operator.Status.Data); err != nil {
   266  					return errors.Trace(err)
   267  				}
   268  			}
   269  		case _, ok := <-appChangesWatcher.Changes():
   270  			if !ok {
   271  				return errors.New("application watcher closed")
   272  			}
   273  			// If charm is (now) a v2 charm, exit the worker.
   274  			format, err := aw.charmFormat()
   275  			if errors.IsNotFound(err) {
   276  				aw.logger.Debugf("application %q no longer exists", aw.application)
   277  				return nil
   278  			} else if err != nil {
   279  				return errors.Trace(err)
   280  			}
   281  			if format >= charm.FormatV2 {
   282  				aw.logger.Debugf("application %q v1 worker got v2 charm event, stopping", aw.application)
   283  				return nil
   284  			}
   285  		}
   286  	}
   287  }
   288  
   289  func (aw *applicationWorker) clusterChanged(
   290  	service *caas.Service,
   291  	lastReportedStatus map[string]status.StatusInfo,
   292  	shouldSetScale bool,
   293  ) error {
   294  	units, err := aw.containerBroker.Units(aw.application, aw.mode)
   295  	if err != nil {
   296  		return errors.Trace(err)
   297  	}
   298  	var scale *int
   299  	var generation *int64
   300  	if service != nil && shouldSetScale {
   301  		generation = service.Generation
   302  		scale = service.Scale
   303  	}
   304  	args := params.UpdateApplicationUnits{
   305  		ApplicationTag: names.NewApplicationTag(aw.application).String(),
   306  		Scale:          scale,
   307  		Generation:     generation,
   308  	}
   309  	if service != nil {
   310  		args.Status = params.EntityStatus{
   311  			Status: service.Status.Status,
   312  			Info:   service.Status.Message,
   313  			Data:   service.Status.Data,
   314  		}
   315  	}
   316  	for _, u := range units {
   317  		// For pods managed by the substrate, any marked as dying
   318  		// are treated as non-existing.
   319  		if u.Dying {
   320  			continue
   321  		}
   322  		unitStatus := u.Status
   323  		lastStatus, ok := lastReportedStatus[u.Id]
   324  		lastReportedStatus[u.Id] = unitStatus
   325  		if ok {
   326  			// If we've seen the same status value previously,
   327  			// report as unknown as this value is ignored.
   328  			if reflect.DeepEqual(lastStatus, unitStatus) {
   329  				unitStatus = status.StatusInfo{
   330  					Status: status.Unknown,
   331  				}
   332  			}
   333  		}
   334  
   335  		unitParams := params.ApplicationUnitParams{
   336  			ProviderId: u.Id,
   337  			Address:    u.Address,
   338  			Ports:      u.Ports,
   339  			Stateful:   u.Stateful,
   340  			Status:     unitStatus.Status.String(),
   341  			Info:       unitStatus.Message,
   342  			Data:       unitStatus.Data,
   343  		}
   344  		// Fill in any filesystem info for volumes attached to the unit.
   345  		// A unit will not become active until all required volumes are
   346  		// provisioned, so it makes sense to send this information along
   347  		// with the units to which they are attached.
   348  		for _, info := range u.FilesystemInfo {
   349  			unitParams.FilesystemInfo = append(unitParams.FilesystemInfo, params.KubernetesFilesystemInfo{
   350  				StorageName:  info.StorageName,
   351  				FilesystemId: info.FilesystemId,
   352  				Size:         info.Size,
   353  				MountPoint:   info.MountPoint,
   354  				ReadOnly:     info.ReadOnly,
   355  				Status:       info.Status.Status.String(),
   356  				Info:         info.Status.Message,
   357  				Data:         info.Status.Data,
   358  				Volume: params.KubernetesVolumeInfo{
   359  					VolumeId:   info.Volume.VolumeId,
   360  					Size:       info.Volume.Size,
   361  					Persistent: info.Volume.Persistent,
   362  					Status:     info.Volume.Status.Status.String(),
   363  					Info:       info.Volume.Status.Message,
   364  					Data:       info.Volume.Status.Data,
   365  				},
   366  			})
   367  		}
   368  		args.Units = append(args.Units, unitParams)
   369  	}
   370  	appUnitInfo, err := aw.unitUpdater.UpdateUnits(args)
   371  	if err != nil {
   372  		// We can ignore not found errors as the worker will get stopped anyway.
   373  		// We can also ignore Forbidden errors raised from SetScale because disordered events could happen often.
   374  		if !errors.IsForbidden(err) && !errors.IsNotFound(err) {
   375  			return errors.Trace(err)
   376  		}
   377  		aw.logger.Warningf("update units %v", err)
   378  	}
   379  
   380  	if appUnitInfo != nil {
   381  		for _, unitInfo := range appUnitInfo.Units {
   382  			unit, err := names.ParseUnitTag(unitInfo.UnitTag)
   383  			if err != nil {
   384  				return errors.Trace(err)
   385  			}
   386  			err = aw.containerBroker.AnnotateUnit(aw.application, aw.mode, unitInfo.ProviderId, unit)
   387  			if errors.IsNotFound(err) {
   388  				continue
   389  			} else if err != nil {
   390  				return errors.Trace(err)
   391  			}
   392  		}
   393  	}
   394  	return nil
   395  }
   396  
   397  func (aw *applicationWorker) charmFormat() (charm.Format, error) {
   398  	charmInfo, err := aw.charmGetter.ApplicationCharmInfo(aw.application)
   399  	if err != nil {
   400  		return charm.FormatUnknown, errors.Annotatef(err, "failed to get charm info for application %q", aw.application)
   401  	}
   402  	return charm.MetaFormat(charmInfo.Charm()), nil
   403  }