github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/upgradeseries/worker.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package upgradeseries 5 6 import ( 7 "strings" 8 "sync" 9 10 "github.com/juju/errors" 11 "gopkg.in/juju/names.v2" 12 "gopkg.in/juju/worker.v1" 13 "gopkg.in/juju/worker.v1/catacomb" 14 15 "github.com/juju/juju/apiserver/params" 16 "github.com/juju/juju/core/model" 17 "github.com/juju/juju/service" 18 "github.com/juju/os/series" 19 ) 20 21 //go:generate mockgen -package mocks -destination mocks/package_mock.go github.com/juju/juju/worker/upgradeseries Facade,Logger,AgentService,ServiceAccess,Upgrader 22 23 var hostSeries = series.HostSeries 24 25 // Logger represents the methods required to emit log messages. 26 type Logger interface { 27 Debugf(message string, args ...interface{}) 28 Infof(message string, args ...interface{}) 29 Warningf(message string, args ...interface{}) 30 Errorf(message string, args ...interface{}) 31 } 32 33 // Config is the configuration needed to construct an UpgradeSeries worker. 34 type Config struct { 35 // FacadeFactory is used to acquire back-end state with 36 // the input tag context. 37 FacadeFactory func(names.Tag) Facade 38 39 // Logger is the logger for this worker. 40 Logger Logger 41 42 // Tag is the current machine tag. 43 Tag names.Tag 44 45 // ServiceAccess provides access to the local init system. 46 Service ServiceAccess 47 48 // UpgraderFactory is a factory method that will return an upgrader capable 49 // of handling service and agent binary manipulation for a 50 // runtime-determined target OS series. 51 UpgraderFactory func(string) (Upgrader, error) 52 } 53 54 // Validate validates the upgrade-series worker configuration. 55 func (config Config) Validate() error { 56 if config.Logger == nil { 57 return errors.NotValidf("nil Logger") 58 } 59 if config.Tag == nil { 60 return errors.NotValidf("nil machine tag") 61 } 62 k := config.Tag.Kind() 63 if k != names.MachineTagKind { 64 return errors.NotValidf("%q tag kind", k) 65 } 66 if config.FacadeFactory == nil { 67 return errors.NotValidf("nil FacadeFactory") 68 } 69 if config.Service == nil { 70 return errors.NotValidf("nil Service") 71 } 72 return nil 73 } 74 75 // upgradeSeriesWorker is responsible for machine and unit agent requirements 76 // during upgrade-series: 77 // copying the agent binary directory and renaming; 78 // rewriting the machine and unit(s) systemd files if necessary; 79 // ensuring unit agents are started post-upgrade; 80 // moving the status of the upgrade-series steps along. 81 type upgradeSeriesWorker struct { 82 Facade 83 84 facadeFactory func(names.Tag) Facade 85 catacomb catacomb.Catacomb 86 logger Logger 87 service ServiceAccess 88 upgraderFactory func(string) (Upgrader, error) 89 90 // Some local state retained for reporting purposes. 91 mu sync.Mutex 92 machineStatus model.UpgradeSeriesStatus 93 preparedUnits []names.UnitTag 94 completedUnits []names.UnitTag 95 96 // Ensure that leaders are pinned only once if possible, 97 // on the first transition to UpgradeSeriesPrepareStarted. 98 // However repeated pin calls are not of too much concern, 99 // as the pin operations are idempotent. 100 leadersPinned bool 101 } 102 103 // NewWorker creates, starts and returns a new upgrade-series worker based on 104 // the input configuration. 105 func NewWorker(config Config) (worker.Worker, error) { 106 if err := config.Validate(); err != nil { 107 return nil, errors.Trace(err) 108 } 109 110 w := &upgradeSeriesWorker{ 111 Facade: config.FacadeFactory(config.Tag), 112 facadeFactory: config.FacadeFactory, 113 logger: config.Logger, 114 service: config.Service, 115 upgraderFactory: config.UpgraderFactory, 116 machineStatus: model.UpgradeSeriesNotStarted, 117 leadersPinned: false, 118 } 119 120 if err := catacomb.Invoke(catacomb.Plan{ 121 Site: &w.catacomb, 122 Work: w.loop, 123 }); err != nil { 124 return nil, errors.Trace(err) 125 } 126 127 return w, nil 128 } 129 130 func (w *upgradeSeriesWorker) loop() error { 131 uw, err := w.WatchUpgradeSeriesNotifications() 132 if err != nil { 133 return errors.Trace(err) 134 } 135 err = w.catacomb.Add(uw) 136 if err != nil { 137 return errors.Trace(err) 138 } 139 for { 140 select { 141 case <-w.catacomb.Dying(): 142 return w.catacomb.ErrDying() 143 case <-uw.Changes(): 144 if err := w.handleUpgradeSeriesChange(); err != nil { 145 return errors.Trace(err) 146 } 147 } 148 } 149 } 150 151 // handleUpgradeSeriesChange retrieves the current upgrade-series status for 152 // this machine and based on the status, calls methods that will progress 153 // the workflow accordingly. 154 func (w *upgradeSeriesWorker) handleUpgradeSeriesChange() error { 155 w.mu.Lock() 156 defer w.mu.Unlock() 157 158 var err error 159 if w.machineStatus, err = w.MachineStatus(); err != nil { 160 if errors.IsNotFound(err) { 161 // No upgrade-series lock. This can happen when: 162 // - The first watch call is made. 163 // - The lock is removed after a completed upgrade. 164 w.logger.Infof("no series upgrade lock present") 165 w.machineStatus = model.UpgradeSeriesNotStarted 166 w.preparedUnits = nil 167 w.completedUnits = nil 168 return nil 169 } 170 return errors.Trace(err) 171 } 172 w.logger.Infof("machine series upgrade status is %q", w.machineStatus) 173 174 switch w.machineStatus { 175 case model.UpgradeSeriesPrepareStarted: 176 err = w.handlePrepareStarted() 177 case model.UpgradeSeriesCompleteStarted: 178 err = w.handleCompleteStarted() 179 case model.UpgradeSeriesCompleted: 180 err = w.handleCompleted() 181 } 182 return errors.Trace(err) 183 } 184 185 // handlePrepareStarted handles workflow for the machine with an upgrade-series 186 // lock status of "UpgradeSeriesPrepareStarted" 187 func (w *upgradeSeriesWorker) handlePrepareStarted() error { 188 var err error 189 if !w.leadersPinned { 190 if err = w.pinLeaders(); err != nil { 191 return errors.Trace(err) 192 } 193 } 194 195 if w.preparedUnits, err = w.UnitsPrepared(); err != nil { 196 return errors.Trace(err) 197 } 198 199 unitServices, allConfirmed, err := w.compareUnitAgentServices(w.preparedUnits) 200 if err != nil { 201 return errors.Trace(err) 202 } 203 if !allConfirmed { 204 w.logger.Debugf( 205 "waiting for units to complete series upgrade preparation; known unit agent services: %s", 206 unitNames(unitServices), 207 ) 208 return nil 209 } 210 211 return errors.Trace(w.transitionPrepareComplete(unitServices)) 212 } 213 214 // transitionPrepareComplete rewrites service unit files for unit agents running 215 // on this machine so that they are compatible with the init system of the 216 // series upgrade target. 217 func (w *upgradeSeriesWorker) transitionPrepareComplete(unitServices map[string]string) error { 218 w.logger.Infof("preparing service units for series upgrade") 219 toSeries, err := w.TargetSeries() 220 if err != nil { 221 return errors.Trace(err) 222 } 223 upgrader, err := w.upgraderFactory(toSeries) 224 if err != nil { 225 return errors.Trace(err) 226 } 227 if err := upgrader.PerformUpgrade(); err != nil { 228 return errors.Trace(err) 229 } 230 return errors.Trace(w.SetMachineStatus(model.UpgradeSeriesPrepareCompleted, 231 "binaries and service files written")) 232 } 233 234 func (w *upgradeSeriesWorker) handleCompleteStarted() error { 235 var err error 236 if w.preparedUnits, err = w.UnitsPrepared(); err != nil { 237 return errors.Trace(err) 238 } 239 240 // If the units are still all in the "PrepareComplete" state, then the 241 // manual tasks have been run and an operator has executed the 242 // upgrade-series completion command; start all the unit agents, 243 // and progress the workflow. 244 unitServices, allConfirmed, err := w.compareUnitAgentServices(w.preparedUnits) 245 if err != nil { 246 return errors.Trace(err) 247 } 248 servicesPresent := len(unitServices) > 0 249 250 // allConfirmed returns true when there are no units, so we only need this 251 // transition when there are services to start. 252 // If there are none, just proceed to the completed stage. 253 if allConfirmed && servicesPresent { 254 return errors.Trace(w.transitionUnitsStarted(unitServices)) 255 } 256 257 // If the units have all completed their workflow, then we are done. 258 // Make the final update to the lock to say the machine is completed. 259 if w.completedUnits, err = w.UnitsCompleted(); err != nil { 260 return errors.Trace(err) 261 } 262 263 unitServices, allConfirmed, err = w.compareUnitAgentServices(w.completedUnits) 264 if err != nil { 265 return errors.Trace(err) 266 } 267 268 if allConfirmed { 269 w.logger.Infof("series upgrade complete") 270 return errors.Trace(w.SetMachineStatus(model.UpgradeSeriesCompleted, "series upgrade complete")) 271 } 272 273 return nil 274 } 275 276 // transitionUnitsStarted iterates over units managed by this machine. Starts 277 // the unit's agent service, and transitions all unit subordinate statuses. 278 func (w *upgradeSeriesWorker) transitionUnitsStarted(unitServices map[string]string) error { 279 w.logger.Infof("ensuring units are up after series upgrade") 280 281 for unit, serviceName := range unitServices { 282 svc, err := w.service.DiscoverService(serviceName) 283 if err != nil { 284 return errors.Trace(err) 285 } 286 running, err := svc.Running() 287 if err != nil { 288 return errors.Trace(err) 289 } 290 if running { 291 continue 292 } 293 if err := svc.Start(); err != nil { 294 return errors.Annotatef(err, "starting %q unit agent after series upgrade", unit) 295 } 296 } 297 298 return errors.Trace(w.StartUnitCompletion("started unit agents after series upgrade")) 299 } 300 301 // handleCompleted notifies the server that it has completed the upgrade 302 // workflow, then unpins leadership for applications running on the machine. 303 func (w *upgradeSeriesWorker) handleCompleted() error { 304 s, err := hostSeries() 305 if err != nil { 306 return errors.Trace(err) 307 } 308 if err = w.FinishUpgradeSeries(s); err != nil { 309 return errors.Trace(err) 310 } 311 return errors.Trace(w.unpinLeaders()) 312 } 313 314 // compareUnitsAgentServices filters the services running on the local machine 315 // to those that are for unit agents. 316 // The service names keyed by unit names are returned, along with a boolean 317 // indicating whether all the input unit tags are represented in the 318 // service map. 319 // NOTE: No unit tags and no agent services returns true, meaning that the 320 // workflow can progress. 321 func (w *upgradeSeriesWorker) compareUnitAgentServices(units []names.UnitTag) (map[string]string, bool, error) { 322 unitServices, err := w.unitServices() 323 if err != nil { 324 return nil, false, errors.Trace(err) 325 } 326 if len(unitServices) == 0 { 327 w.logger.Debugf("no unit agent services found") 328 } 329 if len(units) != len(unitServices) { 330 return unitServices, false, nil 331 } 332 333 for _, u := range units { 334 if _, ok := unitServices[u.Id()]; !ok { 335 return unitServices, false, nil 336 } 337 } 338 return unitServices, true, nil 339 } 340 341 // pinLeaders pins leadership for applications 342 // represented by units running on this machine. 343 func (w *upgradeSeriesWorker) pinLeaders() (err error) { 344 // if we encounter an error, 345 // attempt to ensure that no application leaders remain pinned. 346 defer func() { 347 if err != nil { 348 if unpinErr := w.unpinLeaders(); unpinErr != nil { 349 err = errors.Wrap(err, unpinErr) 350 } 351 } 352 }() 353 354 results, err := w.PinMachineApplications() 355 if err != nil { 356 // If pin machine applications method return not implemented because it's 357 // utilising the legacy leases store, then we should display the warning 358 // in the log and return out. Unpinning leaders should be safe as that 359 // should be considered a no-op 360 if params.IsCodeNotImplemented(err) { 361 w.logger.Infof("failed to pin machine applications, with legacy lease manager leadership pinning is not implemented") 362 return nil 363 } 364 return errors.Trace(err) 365 } 366 367 var lastErr error 368 for app, err := range results { 369 if err == nil { 370 w.logger.Infof("unpin leader for application %q", app) 371 continue 372 } 373 w.logger.Errorf("failed to pin leader for application %q: %s", app, err.Error()) 374 lastErr = err 375 } 376 377 if lastErr == nil { 378 w.leadersPinned = true 379 return nil 380 } 381 return errors.Trace(lastErr) 382 } 383 384 // unpinLeaders unpins leadership for applications 385 // represented by units running on this machine. 386 func (w *upgradeSeriesWorker) unpinLeaders() error { 387 results, err := w.UnpinMachineApplications() 388 if err != nil { 389 return errors.Trace(err) 390 } 391 392 var lastErr error 393 for app, err := range results { 394 if err == nil { 395 w.logger.Infof("unpinned leader for application %q", app) 396 continue 397 } 398 w.logger.Errorf("failed to unpin leader for application %q: %s", app, err.Error()) 399 lastErr = err 400 } 401 402 if lastErr == nil { 403 w.leadersPinned = false 404 return nil 405 } 406 return errors.Trace(lastErr) 407 } 408 409 // Unit services returns a map of unit agent service names, 410 // keyed on their unit IDs. 411 func (w *upgradeSeriesWorker) unitServices() (map[string]string, error) { 412 services, err := w.service.ListServices() 413 if err != nil { 414 return nil, errors.Trace(err) 415 } 416 return service.FindUnitServiceNames(services), nil 417 } 418 419 // Report (worker.Reporter) generates a report for the Juju engine. 420 func (w *upgradeSeriesWorker) Report() map[string]interface{} { 421 w.mu.Lock() 422 defer w.mu.Unlock() 423 424 report := map[string]interface{}{"machine status": w.machineStatus} 425 426 if len(w.preparedUnits) > 0 { 427 units := make([]string, len(w.preparedUnits)) 428 for i, u := range w.preparedUnits { 429 units[i] = u.Id() 430 } 431 report["prepared units"] = units 432 } 433 434 if len(w.completedUnits) > 0 { 435 units := make([]string, len(w.completedUnits)) 436 for i, u := range w.completedUnits { 437 units[i] = u.Id() 438 } 439 report["completed units"] = units 440 } 441 442 return report 443 } 444 445 // Kill implements worker.Worker.Kill. 446 func (w *upgradeSeriesWorker) Kill() { 447 w.catacomb.Kill(nil) 448 } 449 450 // Wait implements worker.Worker.Wait. 451 func (w *upgradeSeriesWorker) Wait() error { 452 return w.catacomb.Wait() 453 } 454 455 // Stop stops the upgrade-series worker and returns any 456 // error it encountered when running. 457 func (w *upgradeSeriesWorker) Stop() error { 458 w.Kill() 459 return w.Wait() 460 } 461 462 // unitNames returns a comma-delimited string of unit names based on the input 463 // map of unit agent services. 464 func unitNames(units map[string]string) string { 465 unitIds := make([]string, len(units)) 466 i := 0 467 for u := range units { 468 unitIds[i] = u 469 i++ 470 } 471 return strings.Join(unitIds, ", ") 472 }