github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/caasunitprovisioner/application_worker.go (about) 1 // Copyright 2017 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package caasunitprovisioner 5 6 import ( 7 "reflect" 8 "strings" 9 10 "github.com/juju/charm/v12" 11 "github.com/juju/errors" 12 "github.com/juju/names/v5" 13 "github.com/juju/worker/v3" 14 "github.com/juju/worker/v3/catacomb" 15 16 "github.com/juju/juju/caas" 17 "github.com/juju/juju/core/status" 18 "github.com/juju/juju/core/watcher" 19 "github.com/juju/juju/rpc/params" 20 ) 21 22 type applicationWorker struct { 23 catacomb catacomb.Catacomb 24 application string 25 mode caas.DeploymentMode 26 serviceBroker ServiceBroker 27 containerBroker ContainerBroker 28 29 provisioningStatusSetter ProvisioningStatusSetter 30 provisioningInfoGetter ProvisioningInfoGetter 31 applicationGetter ApplicationGetter 32 applicationUpdater ApplicationUpdater 33 unitUpdater UnitUpdater 34 charmGetter CharmGetter 35 36 logger Logger 37 } 38 39 func newApplicationWorker( 40 application string, 41 mode caas.DeploymentMode, 42 serviceBroker ServiceBroker, 43 containerBroker ContainerBroker, 44 provisioningStatusSetter ProvisioningStatusSetter, 45 provisioningInfoGetter ProvisioningInfoGetter, 46 applicationGetter ApplicationGetter, 47 applicationUpdater ApplicationUpdater, 48 unitUpdater UnitUpdater, 49 charmGetter CharmGetter, 50 logger Logger, 51 ) (*applicationWorker, error) { 52 w := &applicationWorker{ 53 application: application, 54 mode: mode, 55 serviceBroker: serviceBroker, 56 containerBroker: containerBroker, 57 provisioningStatusSetter: provisioningStatusSetter, 58 provisioningInfoGetter: provisioningInfoGetter, 59 applicationGetter: applicationGetter, 60 applicationUpdater: applicationUpdater, 61 unitUpdater: unitUpdater, 62 charmGetter: charmGetter, 63 logger: logger, 64 } 65 if err := catacomb.Invoke(catacomb.Plan{ 66 Site: &w.catacomb, 67 Work: w.loop, 68 }); err != nil { 69 return nil, errors.Trace(err) 70 } 71 return w, nil 72 } 73 74 // Kill is part of the worker.Worker interface. 75 func (aw *applicationWorker) Kill() { 76 aw.catacomb.Kill(nil) 77 } 78 79 // Wait is part of the worker.Worker interface. 80 func (aw *applicationWorker) Wait() error { 81 return aw.catacomb.Wait() 82 } 83 84 func (aw *applicationWorker) loop() error { 85 if aw.mode == caas.ModeWorkload { 86 deploymentWorker, err := newDeploymentWorker( 87 aw.application, 88 aw.provisioningStatusSetter, 89 aw.serviceBroker, 90 aw.provisioningInfoGetter, 91 aw.applicationGetter, 92 aw.applicationUpdater, 93 aw.logger, 94 ) 95 if err != nil { 96 return errors.Trace(err) 97 } 98 _ = aw.catacomb.Add(deploymentWorker) 99 } 100 101 var ( 102 brokerUnitsWatcher watcher.NotifyWatcher 103 brokerUnitsChannel watcher.NotifyChannel 104 105 appOperatorWatcher watcher.NotifyWatcher 106 appOperatorChannel watcher.NotifyChannel 107 108 appDeploymentWatcher watcher.NotifyWatcher 109 appDeploymentChannel watcher.NotifyChannel 110 ) 111 112 appChangesWatcher, err := aw.applicationGetter.WatchApplication(aw.application) 113 if err != nil { 114 return errors.Trace(err) 115 } 116 117 // The caas watcher can just die from underneath hence it needs to be 118 // restarted all the time. So we don't abuse the catacomb by adding new 119 // workers unbounded, use a defer to stop the running worker. 120 defer func() { 121 if brokerUnitsWatcher != nil { 122 _ = worker.Stop(brokerUnitsWatcher) 123 } 124 if appOperatorWatcher != nil { 125 _ = worker.Stop(appOperatorWatcher) 126 } 127 if appDeploymentWatcher != nil { 128 _ = worker.Stop(appDeploymentWatcher) 129 } 130 _ = worker.Stop(appChangesWatcher) 131 }() 132 133 // Cache the last reported status information 134 // so we only report true changes. 135 lastReportedStatus := make(map[string]status.StatusInfo) 136 lastReportedScale := -1 137 initialOperatorEvent := true 138 logger := aw.logger 139 for { 140 var err error 141 // The caas watcher can just die from underneath so recreate if needed. 142 if brokerUnitsWatcher == nil { 143 brokerUnitsWatcher, err = aw.containerBroker.WatchUnits(aw.application, aw.mode) 144 if err != nil { 145 if strings.Contains(err.Error(), "unexpected EOF") { 146 logger.Warningf("k8s cloud hosting %q has disappeared", aw.application, aw.mode) 147 return nil 148 } 149 return errors.Annotatef(err, "failed to start unit watcher for %q", aw.application) 150 } 151 brokerUnitsChannel = brokerUnitsWatcher.Changes() 152 } 153 if appOperatorWatcher == nil && aw.mode == caas.ModeWorkload { 154 appOperatorWatcher, err = aw.containerBroker.WatchOperator(aw.application) 155 if err != nil { 156 if strings.Contains(err.Error(), "unexpected EOF") { 157 logger.Warningf("k8s cloud hosting %q has disappeared", aw.application) 158 return nil 159 } 160 return errors.Annotatef(err, "failed to start operator watcher for %q", aw.application) 161 } 162 appOperatorChannel = appOperatorWatcher.Changes() 163 } 164 if appDeploymentWatcher == nil { 165 appDeploymentWatcher, err = aw.serviceBroker.WatchService(aw.application, aw.mode) 166 if err != nil { 167 if strings.Contains(err.Error(), "unexpected EOF") { 168 logger.Warningf("k8s cloud hosting %q has disappeared", aw.application, aw.mode) 169 return nil 170 } 171 return errors.Annotatef(err, "failed to start deployment watcher for %q", aw.application) 172 } 173 appDeploymentChannel = appDeploymentWatcher.Changes() 174 } 175 176 select { 177 // We must handle any processing due to application being removed prior 178 // to shutdown so that we don't leave stuff running in the cloud. 179 case <-aw.catacomb.Dying(): 180 return aw.catacomb.ErrDying() 181 case _, ok := <-brokerUnitsChannel: 182 logger.Debugf("units changed: %#v", ok) 183 if !ok { 184 logger.Debugf("%v", brokerUnitsWatcher.Wait()) 185 _ = worker.Stop(brokerUnitsWatcher) 186 brokerUnitsWatcher = nil 187 continue 188 } 189 service, err := aw.serviceBroker.GetService(aw.application, aw.mode, false) 190 if err != nil && !errors.IsNotFound(err) { 191 return errors.Trace(err) 192 } 193 logger.Debugf("service for %v(%v): %+v", aw.application, aw.mode, service) 194 if err := aw.clusterChanged(service, lastReportedStatus, true); err != nil { 195 // TODO(caas): change the shouldSetScale to false here once appDeploymentWatcher can get all events from k8s. 196 return errors.Trace(err) 197 } 198 case _, ok := <-appDeploymentChannel: 199 logger.Debugf("deployment changed: %#v", ok) 200 if !ok { 201 logger.Debugf("%v", appDeploymentWatcher.Wait()) 202 _ = worker.Stop(appDeploymentWatcher) 203 appDeploymentWatcher = nil 204 continue 205 } 206 service, err := aw.serviceBroker.GetService(aw.application, aw.mode, false) 207 if err != nil && !errors.IsNotFound(err) { 208 return errors.Trace(err) 209 } 210 haveNewStatus := true 211 if service.Id != "" { 212 // update svc info (addresses etc.) cloudservices. 213 err = updateApplicationService( 214 names.NewApplicationTag(aw.application), service, aw.applicationUpdater, 215 ) 216 if errors.IsForbidden(err) { 217 // ignore errors raised from SetScale because disordered events could happen often. 218 logger.Warningf("%v", err) 219 } else if err != nil { 220 return errors.Trace(err) 221 } 222 lastStatus, ok := lastReportedStatus[service.Id] 223 lastReportedStatus[service.Id] = service.Status 224 if ok { 225 // If we've seen the same status value previously, 226 // report as unknown as this value is ignored. 227 if reflect.DeepEqual(lastStatus, service.Status) { 228 service.Status = status.StatusInfo{ 229 Status: status.Unknown, 230 } 231 haveNewStatus = false 232 } 233 } 234 } 235 if service != nil && service.Scale != nil { 236 if *service.Scale == lastReportedScale && !haveNewStatus { 237 continue 238 } 239 lastReportedScale = *service.Scale 240 } 241 if err := aw.clusterChanged(service, lastReportedStatus, true); err != nil { 242 return errors.Trace(err) 243 } 244 case _, ok := <-appOperatorChannel: 245 if !ok { 246 logger.Debugf("%v", appOperatorWatcher.Wait()) 247 _ = worker.Stop(appOperatorWatcher) 248 appOperatorWatcher = nil 249 continue 250 } 251 logger.Debugf("operator update for %v", aw.application) 252 operator, err := aw.containerBroker.Operator(aw.application) 253 if errors.IsNotFound(err) { 254 if initialOperatorEvent { 255 initialOperatorEvent = false 256 continue 257 } 258 logger.Debugf("pod not found for application %q", aw.application) 259 if err := aw.provisioningStatusSetter.SetOperatorStatus(aw.application, status.Terminated, "", nil); err != nil { 260 return errors.Trace(err) 261 } 262 } else if err != nil { 263 return errors.Trace(err) 264 } else { 265 if err := aw.provisioningStatusSetter.SetOperatorStatus(aw.application, operator.Status.Status, operator.Status.Message, operator.Status.Data); err != nil { 266 return errors.Trace(err) 267 } 268 } 269 case _, ok := <-appChangesWatcher.Changes(): 270 if !ok { 271 return errors.New("application watcher closed") 272 } 273 // If charm is (now) a v2 charm, exit the worker. 274 format, err := aw.charmFormat() 275 if errors.IsNotFound(err) { 276 aw.logger.Debugf("application %q no longer exists", aw.application) 277 return nil 278 } else if err != nil { 279 return errors.Trace(err) 280 } 281 if format >= charm.FormatV2 { 282 aw.logger.Debugf("application %q v1 worker got v2 charm event, stopping", aw.application) 283 return nil 284 } 285 } 286 } 287 } 288 289 func (aw *applicationWorker) clusterChanged( 290 service *caas.Service, 291 lastReportedStatus map[string]status.StatusInfo, 292 shouldSetScale bool, 293 ) error { 294 units, err := aw.containerBroker.Units(aw.application, aw.mode) 295 if err != nil { 296 return errors.Trace(err) 297 } 298 var scale *int 299 var generation *int64 300 if service != nil && shouldSetScale { 301 generation = service.Generation 302 scale = service.Scale 303 } 304 args := params.UpdateApplicationUnits{ 305 ApplicationTag: names.NewApplicationTag(aw.application).String(), 306 Scale: scale, 307 Generation: generation, 308 } 309 if service != nil { 310 args.Status = params.EntityStatus{ 311 Status: service.Status.Status, 312 Info: service.Status.Message, 313 Data: service.Status.Data, 314 } 315 } 316 for _, u := range units { 317 // For pods managed by the substrate, any marked as dying 318 // are treated as non-existing. 319 if u.Dying { 320 continue 321 } 322 unitStatus := u.Status 323 lastStatus, ok := lastReportedStatus[u.Id] 324 lastReportedStatus[u.Id] = unitStatus 325 if ok { 326 // If we've seen the same status value previously, 327 // report as unknown as this value is ignored. 328 if reflect.DeepEqual(lastStatus, unitStatus) { 329 unitStatus = status.StatusInfo{ 330 Status: status.Unknown, 331 } 332 } 333 } 334 335 unitParams := params.ApplicationUnitParams{ 336 ProviderId: u.Id, 337 Address: u.Address, 338 Ports: u.Ports, 339 Stateful: u.Stateful, 340 Status: unitStatus.Status.String(), 341 Info: unitStatus.Message, 342 Data: unitStatus.Data, 343 } 344 // Fill in any filesystem info for volumes attached to the unit. 345 // A unit will not become active until all required volumes are 346 // provisioned, so it makes sense to send this information along 347 // with the units to which they are attached. 348 for _, info := range u.FilesystemInfo { 349 unitParams.FilesystemInfo = append(unitParams.FilesystemInfo, params.KubernetesFilesystemInfo{ 350 StorageName: info.StorageName, 351 FilesystemId: info.FilesystemId, 352 Size: info.Size, 353 MountPoint: info.MountPoint, 354 ReadOnly: info.ReadOnly, 355 Status: info.Status.Status.String(), 356 Info: info.Status.Message, 357 Data: info.Status.Data, 358 Volume: params.KubernetesVolumeInfo{ 359 VolumeId: info.Volume.VolumeId, 360 Size: info.Volume.Size, 361 Persistent: info.Volume.Persistent, 362 Status: info.Volume.Status.Status.String(), 363 Info: info.Volume.Status.Message, 364 Data: info.Volume.Status.Data, 365 }, 366 }) 367 } 368 args.Units = append(args.Units, unitParams) 369 } 370 appUnitInfo, err := aw.unitUpdater.UpdateUnits(args) 371 if err != nil { 372 // We can ignore not found errors as the worker will get stopped anyway. 373 // We can also ignore Forbidden errors raised from SetScale because disordered events could happen often. 374 if !errors.IsForbidden(err) && !errors.IsNotFound(err) { 375 return errors.Trace(err) 376 } 377 aw.logger.Warningf("update units %v", err) 378 } 379 380 if appUnitInfo != nil { 381 for _, unitInfo := range appUnitInfo.Units { 382 unit, err := names.ParseUnitTag(unitInfo.UnitTag) 383 if err != nil { 384 return errors.Trace(err) 385 } 386 err = aw.containerBroker.AnnotateUnit(aw.application, aw.mode, unitInfo.ProviderId, unit) 387 if errors.IsNotFound(err) { 388 continue 389 } else if err != nil { 390 return errors.Trace(err) 391 } 392 } 393 } 394 return nil 395 } 396 397 func (aw *applicationWorker) charmFormat() (charm.Format, error) { 398 charmInfo, err := aw.charmGetter.ApplicationCharmInfo(aw.application) 399 if err != nil { 400 return charm.FormatUnknown, errors.Annotatef(err, "failed to get charm info for application %q", aw.application) 401 } 402 return charm.MetaFormat(charmInfo.Charm()), nil 403 }