github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/caasoperator/caasoperator.go (about) 1 // Copyright 2017 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package caasoperator 5 6 import ( 7 "fmt" 8 "os" 9 "path/filepath" 10 "sync" 11 "time" 12 13 "github.com/juju/clock" 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "github.com/juju/os/series" 17 "github.com/juju/utils/arch" 18 "github.com/juju/utils/symlink" 19 "github.com/juju/version" 20 "gopkg.in/juju/names.v2" 21 "gopkg.in/juju/worker.v1" 22 "gopkg.in/juju/worker.v1/catacomb" 23 24 apiuniter "github.com/juju/juju/api/uniter" 25 "github.com/juju/juju/core/leadership" 26 "github.com/juju/juju/core/life" 27 "github.com/juju/juju/core/status" 28 jujunames "github.com/juju/juju/juju/names" 29 jujuversion "github.com/juju/juju/version" 30 jworker "github.com/juju/juju/worker" 31 "github.com/juju/juju/worker/caasoperator/remotestate" 32 "github.com/juju/juju/worker/uniter" 33 jujucharm "github.com/juju/juju/worker/uniter/charm" 34 ) 35 36 var logger = loggo.GetLogger("juju.worker.caasoperator") 37 38 // caasOperator implements the capabilities of the caasoperator agent. It is not intended to 39 // implement the actual *behaviour* of the caasoperator agent; that responsibility is 40 // delegated to Mode values, which are expected to react to events and direct 41 // the caasoperator's responses to them. 42 type caasOperator struct { 43 catacomb catacomb.Catacomb 44 config Config 45 paths Paths 46 runner *worker.Runner 47 deployer jujucharm.Deployer 48 stateFile *StateFile 49 } 50 51 // Config hold the configuration for a caasoperator worker. 52 type Config struct { 53 // ModelUUID is the UUID of the model. 54 ModelUUID string 55 56 // ModelName is the name of the model. 57 ModelName string 58 59 // Application holds the name of the application that 60 // this CAAS operator manages. 61 Application string 62 63 // CharmGetter is an interface used for getting the 64 // application's charm URL and SHA256 hash. 65 CharmGetter CharmGetter 66 67 // Clock holds the clock to be used by the CAAS operator 68 // for time-related operations. 69 Clock clock.Clock 70 71 // PodSpecSetter provides an interface for 72 // setting the pod spec for the application. 73 PodSpecSetter PodSpecSetter 74 75 // DataDir holds the path to the Juju "data directory", 76 // i.e. "/var/lib/juju" (by default). The CAAS operator 77 // expects to find the jujud binary at <data-dir>/tools/jujud. 78 DataDir string 79 80 // Downloader is an interface used for downloading the 81 // application charm. 82 Downloader Downloader 83 84 // StatusSetter is an interface used for setting the 85 // application status. 86 StatusSetter StatusSetter 87 88 // UnitGetter is an interface for getting a unit. 89 UnitGetter UnitGetter 90 91 // UnitRemover is an interface for removing a unit. 92 UnitRemover UnitRemover 93 94 // ApplicationWatcher is an interface for getting info about an application's charm. 95 ApplicationWatcher ApplicationWatcher 96 97 // VersionSetter is an interface for setting the operator agent version. 98 VersionSetter VersionSetter 99 100 // LeadershipTrackerFunc is a function for getting a leadership tracker. 101 LeadershipTrackerFunc func(unitTag names.UnitTag) leadership.Tracker 102 103 // UniterFacadeFunc is a function for making a uniter facade. 104 UniterFacadeFunc func(unitTag names.UnitTag) *apiuniter.State 105 106 // UniterParams are parameters used to construct a uniter worker. 107 UniterParams *uniter.UniterParams 108 109 // StartUniterFunc starts a uniter worker using the given runner. 110 StartUniterFunc func(runner *worker.Runner, params *uniter.UniterParams) error 111 } 112 113 func (config Config) Validate() error { 114 if !names.IsValidApplication(config.Application) { 115 return errors.NotValidf("application name %q", config.Application) 116 } 117 if config.CharmGetter == nil { 118 return errors.NotValidf("missing CharmGetter") 119 } 120 if config.ApplicationWatcher == nil { 121 return errors.NotValidf("missing ApplicationWatcher") 122 } 123 if config.UnitGetter == nil { 124 return errors.NotValidf("missing UnitGetter") 125 } 126 if config.UnitRemover == nil { 127 return errors.NotValidf("missing UnitRemover") 128 } 129 if config.LeadershipTrackerFunc == nil { 130 return errors.NotValidf("missing LeadershipTrackerFunc") 131 } 132 if config.UniterFacadeFunc == nil { 133 return errors.NotValidf("missing UniterFacadeFunc") 134 } 135 if config.UniterParams == nil { 136 return errors.NotValidf("missing UniterParams") 137 } 138 if config.Clock == nil { 139 return errors.NotValidf("missing Clock") 140 } 141 if config.PodSpecSetter == nil { 142 return errors.NotValidf("missing PodSpecSetter") 143 } 144 if config.DataDir == "" { 145 return errors.NotValidf("missing DataDir") 146 } 147 if config.Downloader == nil { 148 return errors.NotValidf("missing Downloader") 149 } 150 if config.StatusSetter == nil { 151 return errors.NotValidf("missing StatusSetter") 152 } 153 if config.VersionSetter == nil { 154 return errors.NotValidf("missing VersionSetter") 155 } 156 return nil 157 } 158 159 // NewWorker creates a new worker which will install and operate a 160 // CaaS-based application, by executing hooks and operations in 161 // response to application state changes. 162 func NewWorker(config Config) (worker.Worker, error) { 163 if err := config.Validate(); err != nil { 164 return nil, errors.Trace(err) 165 } 166 paths := NewPaths(config.DataDir, names.NewApplicationTag(config.Application)) 167 deployer, err := jujucharm.NewDeployer( 168 paths.State.CharmDir, 169 paths.State.DeployerDir, 170 jujucharm.NewBundlesDir(paths.State.BundlesDir, config.Downloader), 171 ) 172 if err != nil { 173 return nil, errors.Annotatef(err, "cannot create deployer") 174 } 175 176 op := &caasOperator{ 177 config: config, 178 paths: paths, 179 deployer: deployer, 180 runner: worker.NewRunner(worker.RunnerParams{ 181 Clock: config.Clock, 182 183 // One of the uniter workers failing should not 184 // prevent the others from running. 185 IsFatal: func(error) bool { return false }, 186 187 // For any failures, try again in 3 seconds. 188 RestartDelay: 3 * time.Second, 189 }), 190 } 191 if err := catacomb.Invoke(catacomb.Plan{ 192 Site: &op.catacomb, 193 Work: op.loop, 194 Init: []worker.Worker{op.runner}, 195 }); err != nil { 196 return nil, errors.Trace(err) 197 } 198 return op, nil 199 } 200 201 func (op *caasOperator) makeAgentSymlinks(unitTag names.UnitTag) error { 202 // All units share the same charm and agent binary. 203 // (but with different state dirs for each unit). 204 // Set up the required symlinks. 205 206 // First the agent binary. 207 agentBinaryDir := op.paths.GetToolsDir() 208 unitToolsDir := filepath.Join(agentBinaryDir, unitTag.String()) 209 err := os.Mkdir(unitToolsDir, 0600) 210 if err != nil && !os.IsExist(err) { 211 return errors.Trace(err) 212 } 213 jujudPath := filepath.Join(agentBinaryDir, jujunames.Jujud) 214 err = symlink.New(jujudPath, filepath.Join(unitToolsDir, jujunames.Jujud)) 215 // Ignore permission denied as this won't happen in production 216 // but may happen in testing depending on setup of /tmp 217 if err != nil && !os.IsExist(err) && !os.IsPermission(err) { 218 return errors.Trace(err) 219 } 220 221 // TODO(caas) - remove this when upstream charmhelpers are fixed 222 // Charmhelpers expect to see a jujud in a machine-X directory. 223 legacyMachineDir := filepath.Join(agentBinaryDir, "machine-0") 224 err = os.Mkdir(legacyMachineDir, 0600) 225 if err != nil && !os.IsExist(err) { 226 return errors.Trace(err) 227 } 228 err = symlink.New(jujudPath, filepath.Join(legacyMachineDir, jujunames.Jujud)) 229 if err != nil && !os.IsExist(err) && !os.IsPermission(err) { 230 return errors.Trace(err) 231 } 232 233 // Second the charm directory. 234 unitAgentDir := filepath.Join(op.config.DataDir, "agents", unitTag.String()) 235 err = os.MkdirAll(unitAgentDir, 0600) 236 if err != nil && !os.IsExist(err) { 237 return errors.Trace(err) 238 } 239 agentCharmDir := op.paths.GetCharmDir() 240 err = symlink.New(agentCharmDir, filepath.Join(unitAgentDir, "charm")) 241 // Ignore permission denied as this won't happen in production 242 // but may happen in testing depending on setup of /tmp 243 if err != nil && !os.IsExist(err) && !os.IsPermission(err) { 244 return errors.Trace(err) 245 } 246 return nil 247 } 248 249 func (op *caasOperator) removeUnitDir(unitTag names.UnitTag) error { 250 unitAgentDir := filepath.Join(op.config.DataDir, "agents", unitTag.String()) 251 return os.RemoveAll(unitAgentDir) 252 } 253 254 func toBinaryVersion(vers version.Number) version.Binary { 255 outVers := version.Binary{ 256 Number: vers, 257 Arch: arch.HostArch(), 258 Series: series.MustHostSeries(), 259 } 260 return outVers 261 } 262 263 func (op *caasOperator) init() (*LocalState, error) { 264 if err := jujucharm.ClearDownloads(op.paths.State.BundlesDir); err != nil { 265 logger.Warningf(err.Error()) 266 } 267 268 op.stateFile = NewStateFile(op.paths.State.OperationsFile) 269 localState, err := op.stateFile.Read() 270 if err == ErrNoStateFile { 271 localState = &LocalState{} 272 } 273 274 if err := op.ensureCharm(localState); err != nil { 275 if err == jworker.ErrTerminateAgent { 276 return nil, err 277 } 278 return nil, errors.Annotatef(err, 279 "failed to initialize caasoperator for %q", 280 op.config.Application, 281 ) 282 } 283 return localState, nil 284 } 285 286 func (op *caasOperator) loop() (err error) { 287 localState, err := op.init() 288 if err != nil { 289 return err 290 } 291 logger.Infof("operator %q started", op.config.Application) 292 293 // Start by reporting current tools (which includes arch/series). 294 if err := op.config.VersionSetter.SetVersion( 295 op.config.Application, toBinaryVersion(jujuversion.Current)); err != nil { 296 return errors.Annotate(err, "cannot set agent version") 297 } 298 299 var ( 300 watcher remotestate.Watcher 301 watcherMu sync.Mutex 302 ) 303 304 restartWatcher := func() error { 305 watcherMu.Lock() 306 defer watcherMu.Unlock() 307 308 if watcher != nil { 309 // watcher added to catacomb, will kill operator if there's an error. 310 worker.Stop(watcher) 311 } 312 var err error 313 watcher, err = remotestate.NewWatcher( 314 remotestate.WatcherConfig{ 315 CharmGetter: op.config.CharmGetter, 316 Application: op.config.Application, 317 ApplicationWatcher: op.config.ApplicationWatcher, 318 }) 319 if err != nil { 320 return errors.Trace(err) 321 } 322 if err := op.catacomb.Add(watcher); err != nil { 323 return errors.Trace(err) 324 } 325 return nil 326 } 327 328 jujuUnitsWatcher, err := op.config.UnitGetter.WatchUnits(op.config.Application) 329 if err != nil { 330 return errors.Trace(err) 331 } 332 op.catacomb.Add(jujuUnitsWatcher) 333 334 if err := op.setStatus(status.Active, ""); err != nil { 335 return errors.Trace(err) 336 } 337 338 // Keep a record of the alive units an a channel used to notify 339 // their uniter workers when the charm version has changed. 340 aliveUnits := make(map[string]chan struct{}) 341 342 if err = restartWatcher(); err != nil { 343 err = errors.Annotate(err, "(re)starting watcher") 344 return errors.Trace(err) 345 } 346 347 // We should not do anything until there has been a change 348 // to the remote state. The watcher will trigger at least 349 // once initially. 350 select { 351 case <-op.catacomb.Dying(): 352 return op.catacomb.ErrDying() 353 case <-watcher.RemoteStateChanged(): 354 } 355 356 for { 357 select { 358 case <-op.catacomb.Dying(): 359 return op.catacomb.ErrDying() 360 case <-watcher.RemoteStateChanged(): 361 snap := watcher.Snapshot() 362 if charmModified(localState, snap) { 363 // Charm changed so download and install the new version. 364 err := op.ensureCharm(localState) 365 if err != nil { 366 return errors.Annotatef(err, "error downloading updated charm %v", localState.CharmURL) 367 } 368 // Reset the application's "Downloading..." message. 369 if err := op.setStatus(status.Active, ""); err != nil { 370 return errors.Trace(err) 371 } 372 // Notify all uniters of the change so they run the upgrade-charm hook. 373 for unitId, changedChan := range aliveUnits { 374 logger.Debugf("trigger upgrade charm for caas unit %v", unitId) 375 select { 376 case <-op.catacomb.Dying(): 377 return op.catacomb.ErrDying() 378 case changedChan <- struct{}{}: 379 } 380 } 381 } 382 case units, ok := <-jujuUnitsWatcher.Changes(): 383 if !ok { 384 return errors.New("watcher closed channel") 385 } 386 for _, unitId := range units { 387 unitLife, err := op.config.UnitGetter.Life(unitId) 388 if err != nil && !errors.IsNotFound(err) { 389 return errors.Trace(err) 390 } 391 unitTag := names.NewUnitTag(unitId) 392 if errors.IsNotFound(err) || unitLife == life.Dead { 393 delete(aliveUnits, unitId) 394 if err := op.runner.StopWorker(unitId); err != nil { 395 return err 396 } 397 // Remove the unit's directory 398 if err := op.removeUnitDir(unitTag); err != nil { 399 return err 400 } 401 // Remove the unit from state. 402 if err := op.config.UnitRemover.RemoveUnit(unitId); err != nil { 403 return err 404 } 405 } else { 406 aliveUnits[unitId] = make(chan struct{}) 407 } 408 // Start a worker to manage any new units. 409 if _, err := op.runner.Worker(unitId, op.catacomb.Dying()); err == nil || unitLife == life.Dead { 410 // Already watching the unit. or we're 411 // not yet watching it and it's dead. 412 continue 413 } 414 415 // Make all the required symlinks. 416 if err := op.makeAgentSymlinks(unitTag); err != nil { 417 return errors.Trace(err) 418 } 419 420 params := op.config.UniterParams 421 params.UnitTag = unitTag 422 params.UniterFacade = op.config.UniterFacadeFunc(unitTag) 423 params.LeadershipTracker = op.config.LeadershipTrackerFunc(unitTag) 424 params.ApplicationChannel = aliveUnits[unitId] 425 if err := op.config.StartUniterFunc(op.runner, params); err != nil { 426 return errors.Trace(err) 427 } 428 } 429 } 430 } 431 } 432 433 func charmModified(local *LocalState, remote remotestate.Snapshot) bool { 434 // CAAS models may not yet have read the charm url from state. 435 if remote.CharmURL == nil { 436 return false 437 } 438 if local == nil || local.CharmURL == nil { 439 logger.Warningf("unexpected nil local charm URL") 440 return true 441 } 442 if *local.CharmURL != *remote.CharmURL { 443 logger.Debugf("upgrade from %v to %v", local.CharmURL, remote.CharmURL) 444 return true 445 } 446 447 if local.CharmModifiedVersion != remote.CharmModifiedVersion { 448 logger.Debugf("upgrade from CharmModifiedVersion %v to %v", local.CharmModifiedVersion, remote.CharmModifiedVersion) 449 return true 450 } 451 if remote.ForceCharmUpgrade { 452 logger.Debugf("force charm upgrade to %v", remote.CharmURL) 453 return true 454 } 455 return false 456 } 457 458 func (op *caasOperator) setStatus(status status.Status, message string, args ...interface{}) error { 459 err := op.config.StatusSetter.SetStatus( 460 op.config.Application, 461 status, 462 fmt.Sprintf(message, args...), 463 nil, 464 ) 465 return errors.Annotate(err, "setting status") 466 } 467 468 // Kill is part of the worker.Worker interface. 469 func (op *caasOperator) Kill() { 470 op.catacomb.Kill(nil) 471 } 472 473 // Wait is part of the worker.Worker interface. 474 func (op *caasOperator) Wait() error { 475 return op.catacomb.Wait() 476 }