github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/cmd/jujud/agent/machine.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package agent 5 6 import ( 7 "fmt" 8 "io" 9 "net" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strconv" 14 "sync" 15 "time" 16 17 "github.com/juju/cmd" 18 "github.com/juju/errors" 19 apiagent "github.com/juju/juju/api/agent" 20 apimachiner "github.com/juju/juju/api/machiner" 21 "github.com/juju/loggo" 22 "github.com/juju/names" 23 "github.com/juju/replicaset" 24 "github.com/juju/utils" 25 "github.com/juju/utils/clock" 26 "github.com/juju/utils/featureflag" 27 "github.com/juju/utils/series" 28 "github.com/juju/utils/set" 29 "github.com/juju/utils/symlink" 30 "github.com/juju/utils/voyeur" 31 "github.com/juju/version" 32 "gopkg.in/juju/charmrepo.v2-unstable" 33 "gopkg.in/mgo.v2" 34 "gopkg.in/natefinch/lumberjack.v2" 35 "launchpad.net/gnuflag" 36 "launchpad.net/tomb" 37 38 "github.com/juju/juju/agent" 39 "github.com/juju/juju/agent/tools" 40 "github.com/juju/juju/api" 41 apideployer "github.com/juju/juju/api/deployer" 42 "github.com/juju/juju/api/metricsmanager" 43 "github.com/juju/juju/apiserver" 44 "github.com/juju/juju/apiserver/params" 45 "github.com/juju/juju/cert" 46 "github.com/juju/juju/cmd/jujud/agent/machine" 47 "github.com/juju/juju/cmd/jujud/agent/model" 48 "github.com/juju/juju/cmd/jujud/reboot" 49 cmdutil "github.com/juju/juju/cmd/jujud/util" 50 "github.com/juju/juju/container" 51 "github.com/juju/juju/container/kvm" 52 "github.com/juju/juju/environs" 53 "github.com/juju/juju/environs/simplestreams" 54 "github.com/juju/juju/instance" 55 jujunames "github.com/juju/juju/juju/names" 56 "github.com/juju/juju/juju/paths" 57 "github.com/juju/juju/mongo" 58 "github.com/juju/juju/network" 59 "github.com/juju/juju/service" 60 "github.com/juju/juju/service/common" 61 "github.com/juju/juju/state" 62 "github.com/juju/juju/state/multiwatcher" 63 "github.com/juju/juju/storage/looputil" 64 "github.com/juju/juju/upgrades" 65 jujuversion "github.com/juju/juju/version" 66 "github.com/juju/juju/watcher" 67 "github.com/juju/juju/worker" 68 "github.com/juju/juju/worker/apicaller" 69 "github.com/juju/juju/worker/certupdater" 70 "github.com/juju/juju/worker/conv2state" 71 "github.com/juju/juju/worker/dblogpruner" 72 "github.com/juju/juju/worker/dependency" 73 "github.com/juju/juju/worker/deployer" 74 "github.com/juju/juju/worker/gate" 75 "github.com/juju/juju/worker/imagemetadataworker" 76 "github.com/juju/juju/worker/logsender" 77 "github.com/juju/juju/worker/modelworkermanager" 78 "github.com/juju/juju/worker/mongoupgrader" 79 "github.com/juju/juju/worker/peergrouper" 80 "github.com/juju/juju/worker/provisioner" 81 "github.com/juju/juju/worker/singular" 82 "github.com/juju/juju/worker/txnpruner" 83 "github.com/juju/juju/worker/upgradesteps" 84 ) 85 86 var ( 87 logger = loggo.GetLogger("juju.cmd.jujud") 88 jujuRun = paths.MustSucceed(paths.JujuRun(series.HostSeries())) 89 jujuDumpLogs = paths.MustSucceed(paths.JujuDumpLogs(series.HostSeries())) 90 91 // The following are defined as variables to allow the tests to 92 // intercept calls to the functions. 93 useMultipleCPUs = utils.UseMultipleCPUs 94 modelManifolds = model.Manifolds 95 newSingularRunner = singular.New 96 peergrouperNew = peergrouper.New 97 newCertificateUpdater = certupdater.NewCertificateUpdater 98 newMetadataUpdater = imagemetadataworker.NewWorker 99 newUpgradeMongoWorker = mongoupgrader.New 100 reportOpenedState = func(io.Closer) {} 101 ) 102 103 // Variable to override in tests, default is true 104 var ProductionMongoWriteConcern = true 105 106 func init() { 107 stateWorkerDialOpts = mongo.DefaultDialOpts() 108 stateWorkerDialOpts.PostDial = func(session *mgo.Session) error { 109 safe := mgo.Safe{} 110 if ProductionMongoWriteConcern { 111 safe.J = true 112 _, err := replicaset.CurrentConfig(session) 113 if err == nil { 114 // set mongo to write-majority (writes only returned after 115 // replicated to a majority of replica-set members). 116 safe.WMode = "majority" 117 } 118 } 119 session.SetSafe(&safe) 120 return nil 121 } 122 } 123 124 // AgentInitializer handles initializing a type for use as a Jujud 125 // agent. 126 type AgentInitializer interface { 127 AddFlags(*gnuflag.FlagSet) 128 CheckArgs([]string) error 129 } 130 131 // AgentConfigWriter encapsulates disk I/O operations with the agent 132 // config. 133 type AgentConfigWriter interface { 134 // ReadConfig reads the config for the given tag from disk. 135 ReadConfig(tag string) error 136 // ChangeConfig executes the given agent.ConfigMutator in a 137 // thread-safe context. 138 ChangeConfig(agent.ConfigMutator) error 139 // CurrentConfig returns a copy of the in-memory agent config. 140 CurrentConfig() agent.Config 141 } 142 143 // NewMachineAgentCmd creates a Command which handles parsing 144 // command-line arguments and instantiating and running a 145 // MachineAgent. 146 func NewMachineAgentCmd( 147 ctx *cmd.Context, 148 machineAgentFactory func(string) *MachineAgent, 149 agentInitializer AgentInitializer, 150 configFetcher AgentConfigWriter, 151 ) cmd.Command { 152 return &machineAgentCmd{ 153 ctx: ctx, 154 machineAgentFactory: machineAgentFactory, 155 agentInitializer: agentInitializer, 156 currentConfig: configFetcher, 157 } 158 } 159 160 type machineAgentCmd struct { 161 cmd.CommandBase 162 163 // This group of arguments is required. 164 agentInitializer AgentInitializer 165 currentConfig AgentConfigWriter 166 machineAgentFactory func(string) *MachineAgent 167 ctx *cmd.Context 168 169 // This group is for debugging purposes. 170 logToStdErr bool 171 172 // The following are set via command-line flags. 173 machineId string 174 } 175 176 // Init is called by the cmd system to initialize the structure for 177 // running. 178 func (a *machineAgentCmd) Init(args []string) error { 179 180 if !names.IsValidMachine(a.machineId) { 181 return fmt.Errorf("--machine-id option must be set, and expects a non-negative integer") 182 } 183 if err := a.agentInitializer.CheckArgs(args); err != nil { 184 return err 185 } 186 187 // Due to changes in the logging, and needing to care about old 188 // models that have been upgraded, we need to explicitly remove the 189 // file writer if one has been added, otherwise we will get duplicate 190 // lines of all logging in the log file. 191 loggo.RemoveWriter("logfile") 192 193 if a.logToStdErr { 194 return nil 195 } 196 197 err := a.currentConfig.ReadConfig(names.NewMachineTag(a.machineId).String()) 198 if err != nil { 199 return errors.Annotate(err, "cannot read agent configuration") 200 } 201 202 // the context's stderr is set as the loggo writer in github.com/juju/cmd/logging.go 203 a.ctx.Stderr = &lumberjack.Logger{ 204 Filename: agent.LogFilename(a.currentConfig.CurrentConfig()), 205 MaxSize: 300, // megabytes 206 MaxBackups: 2, 207 } 208 209 return nil 210 } 211 212 // Run instantiates a MachineAgent and runs it. 213 func (a *machineAgentCmd) Run(c *cmd.Context) error { 214 machineAgent := a.machineAgentFactory(a.machineId) 215 return machineAgent.Run(c) 216 } 217 218 // SetFlags adds the requisite flags to run this command. 219 func (a *machineAgentCmd) SetFlags(f *gnuflag.FlagSet) { 220 a.agentInitializer.AddFlags(f) 221 f.StringVar(&a.machineId, "machine-id", "", "id of the machine to run") 222 } 223 224 // Info returns usage information for the command. 225 func (a *machineAgentCmd) Info() *cmd.Info { 226 return &cmd.Info{ 227 Name: "machine", 228 Purpose: "run a juju machine agent", 229 } 230 } 231 232 // MachineAgentFactoryFn returns a function which instantiates a 233 // MachineAgent given a machineId. 234 func MachineAgentFactoryFn( 235 agentConfWriter AgentConfigWriter, 236 bufferedLogs logsender.LogRecordCh, 237 rootDir string, 238 ) func(string) *MachineAgent { 239 return func(machineId string) *MachineAgent { 240 return NewMachineAgent( 241 machineId, 242 agentConfWriter, 243 bufferedLogs, 244 worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant, worker.RestartDelay), 245 looputil.NewLoopDeviceManager(), 246 rootDir, 247 ) 248 } 249 } 250 251 // NewMachineAgent instantiates a new MachineAgent. 252 func NewMachineAgent( 253 machineId string, 254 agentConfWriter AgentConfigWriter, 255 bufferedLogs logsender.LogRecordCh, 256 runner worker.Runner, 257 loopDeviceManager looputil.LoopDeviceManager, 258 rootDir string, 259 ) *MachineAgent { 260 return &MachineAgent{ 261 machineId: machineId, 262 AgentConfigWriter: agentConfWriter, 263 configChangedVal: voyeur.NewValue(true), 264 bufferedLogs: bufferedLogs, 265 workersStarted: make(chan struct{}), 266 runner: runner, 267 rootDir: rootDir, 268 initialUpgradeCheckComplete: gate.NewLock(), 269 loopDeviceManager: loopDeviceManager, 270 } 271 } 272 273 // MachineAgent is responsible for tying together all functionality 274 // needed to orchestrate a Jujud instance which controls a machine. 275 type MachineAgent struct { 276 AgentConfigWriter 277 278 tomb tomb.Tomb 279 machineId string 280 runner worker.Runner 281 rootDir string 282 bufferedLogs logsender.LogRecordCh 283 configChangedVal *voyeur.Value 284 upgradeComplete gate.Lock 285 workersStarted chan struct{} 286 287 // XXX(fwereade): these smell strongly of goroutine-unsafeness. 288 restoreMode bool 289 restoring bool 290 291 // Used to signal that the upgrade worker will not 292 // reboot the agent on startup because there are no 293 // longer any immediately pending agent upgrades. 294 initialUpgradeCheckComplete gate.Lock 295 296 discoverSpacesComplete gate.Lock 297 298 mongoInitMutex sync.Mutex 299 mongoInitialized bool 300 301 loopDeviceManager looputil.LoopDeviceManager 302 } 303 304 // IsRestorePreparing returns bool representing if we are in restore mode 305 // but not running restore. 306 func (a *MachineAgent) IsRestorePreparing() bool { 307 return a.restoreMode && !a.restoring 308 } 309 310 // IsRestoreRunning returns bool representing if we are in restore mode 311 // and running the actual restore process. 312 func (a *MachineAgent) IsRestoreRunning() bool { 313 return a.restoring 314 } 315 316 func (a *MachineAgent) isUpgradeRunning() bool { 317 return !a.upgradeComplete.IsUnlocked() 318 } 319 320 func (a *MachineAgent) isInitialUpgradeCheckPending() bool { 321 return !a.initialUpgradeCheckComplete.IsUnlocked() 322 } 323 324 // Wait waits for the machine agent to finish. 325 func (a *MachineAgent) Wait() error { 326 return a.tomb.Wait() 327 } 328 329 // Stop stops the machine agent. 330 func (a *MachineAgent) Stop() error { 331 a.runner.Kill() 332 return a.tomb.Wait() 333 } 334 335 // upgradeCertificateDNSNames ensure that the controller certificate 336 // recorded in the agent config and also mongo server.pem contains the 337 // DNSNames entires required by Juju/ 338 func (a *MachineAgent) upgradeCertificateDNSNames() error { 339 agentConfig := a.CurrentConfig() 340 si, ok := agentConfig.StateServingInfo() 341 if !ok || si.CAPrivateKey == "" { 342 // No certificate information exists yet, nothing to do. 343 return nil 344 } 345 // Parse the current certificate to get the current dns names. 346 serverCert, err := cert.ParseCert(si.Cert) 347 if err != nil { 348 return err 349 } 350 update := false 351 dnsNames := set.NewStrings(serverCert.DNSNames...) 352 requiredDNSNames := []string{"local", "juju-apiserver", "juju-mongodb"} 353 for _, dnsName := range requiredDNSNames { 354 if dnsNames.Contains(dnsName) { 355 continue 356 } 357 dnsNames.Add(dnsName) 358 update = true 359 } 360 if !update { 361 return nil 362 } 363 // Write a new certificate to the mongo pem and agent config files. 364 si.Cert, si.PrivateKey, err = cert.NewDefaultServer(agentConfig.CACert(), si.CAPrivateKey, dnsNames.Values()) 365 if err != nil { 366 return err 367 } 368 if err := mongo.UpdateSSLKey(agentConfig.DataDir(), si.Cert, si.PrivateKey); err != nil { 369 return err 370 } 371 return a.AgentConfigWriter.ChangeConfig(func(config agent.ConfigSetter) error { 372 config.SetStateServingInfo(si) 373 return nil 374 }) 375 } 376 377 // Run runs a machine agent. 378 func (a *MachineAgent) Run(*cmd.Context) error { 379 380 defer a.tomb.Done() 381 if err := a.ReadConfig(a.Tag().String()); err != nil { 382 return fmt.Errorf("cannot read agent configuration: %v", err) 383 } 384 385 logger.Infof("machine agent %v start (%s [%s])", a.Tag(), jujuversion.Current, runtime.Compiler) 386 if flags := featureflag.String(); flags != "" { 387 logger.Warningf("developer feature flags enabled: %s", flags) 388 } 389 390 // Before doing anything else, we need to make sure the certificate generated for 391 // use by mongo to validate controller connections is correct. This needs to be done 392 // before any possible restart of the mongo service. 393 // See bug http://pad.lv/1434680 394 if err := a.upgradeCertificateDNSNames(); err != nil { 395 return errors.Annotate(err, "error upgrading server certificate") 396 } 397 398 if upgradeComplete, err := upgradesteps.NewLock(a); err != nil { 399 return errors.Annotate(err, "error during creating upgrade completion channel") 400 } else { 401 a.upgradeComplete = upgradeComplete 402 } 403 404 agentConfig := a.CurrentConfig() 405 createEngine := a.makeEngineCreator(agentConfig.UpgradedToVersion()) 406 network.SetPreferIPv6(agentConfig.PreferIPv6()) 407 charmrepo.CacheDir = filepath.Join(agentConfig.DataDir(), "charmcache") 408 if err := a.createJujudSymlinks(agentConfig.DataDir()); err != nil { 409 return err 410 } 411 a.runner.StartWorker("engine", createEngine) 412 413 // At this point, all workers will have been configured to start 414 close(a.workersStarted) 415 err := a.runner.Wait() 416 switch errors.Cause(err) { 417 case worker.ErrTerminateAgent: 418 err = a.uninstallAgent() 419 case worker.ErrRebootMachine: 420 logger.Infof("Caught reboot error") 421 err = a.executeRebootOrShutdown(params.ShouldReboot) 422 case worker.ErrShutdownMachine: 423 logger.Infof("Caught shutdown error") 424 err = a.executeRebootOrShutdown(params.ShouldShutdown) 425 } 426 err = cmdutil.AgentDone(logger, err) 427 a.tomb.Kill(err) 428 return err 429 } 430 431 func (a *MachineAgent) makeEngineCreator(previousAgentVersion version.Number) func() (worker.Worker, error) { 432 return func() (worker.Worker, error) { 433 config := dependency.EngineConfig{ 434 IsFatal: cmdutil.IsFatal, 435 WorstError: cmdutil.MoreImportantError, 436 ErrorDelay: 3 * time.Second, 437 BounceDelay: 10 * time.Millisecond, 438 } 439 engine, err := dependency.NewEngine(config) 440 if err != nil { 441 return nil, err 442 } 443 manifolds := machine.Manifolds(machine.ManifoldsConfig{ 444 PreviousAgentVersion: previousAgentVersion, 445 Agent: agent.APIHostPortsSetter{Agent: a}, 446 RootDir: a.rootDir, 447 AgentConfigChanged: a.configChangedVal, 448 UpgradeStepsLock: a.upgradeComplete, 449 UpgradeCheckLock: a.initialUpgradeCheckComplete, 450 OpenState: a.initState, 451 OpenStateForUpgrade: a.openStateForUpgrade, 452 StartStateWorkers: a.startStateWorkers, 453 StartAPIWorkers: a.startAPIWorkers, 454 PreUpgradeSteps: upgrades.PreUpgradeSteps, 455 LogSource: a.bufferedLogs, 456 NewDeployContext: newDeployContext, 457 Clock: clock.WallClock, 458 }) 459 if err := dependency.Install(engine, manifolds); err != nil { 460 if err := worker.Stop(engine); err != nil { 461 logger.Errorf("while stopping engine with bad manifolds: %v", err) 462 } 463 return nil, err 464 } 465 return engine, nil 466 } 467 } 468 469 func (a *MachineAgent) executeRebootOrShutdown(action params.RebootAction) error { 470 // At this stage, all API connections would have been closed 471 // We need to reopen the API to clear the reboot flag after 472 // scheduling the reboot. It may be cleaner to do this in the reboot 473 // worker, before returning the ErrRebootMachine. 474 conn, err := apicaller.OnlyConnect(a, apicaller.APIOpen) 475 if err != nil { 476 logger.Infof("Reboot: Error connecting to state") 477 return errors.Trace(err) 478 } 479 480 // block until all units/containers are ready, and reboot/shutdown 481 finalize, err := reboot.NewRebootWaiter(conn, a.CurrentConfig()) 482 if err != nil { 483 return errors.Trace(err) 484 } 485 486 logger.Infof("Reboot: Executing reboot") 487 err = finalize.ExecuteReboot(action) 488 if err != nil { 489 logger.Infof("Reboot: Error executing reboot: %v", err) 490 return errors.Trace(err) 491 } 492 // On windows, the shutdown command is asynchronous. We return ErrRebootMachine 493 // so the agent will simply exit without error pending reboot/shutdown. 494 return worker.ErrRebootMachine 495 } 496 497 func (a *MachineAgent) ChangeConfig(mutate agent.ConfigMutator) error { 498 err := a.AgentConfigWriter.ChangeConfig(mutate) 499 a.configChangedVal.Set(true) 500 return errors.Trace(err) 501 } 502 503 func (a *MachineAgent) maybeStopMongo(ver mongo.Version, isMaster bool) error { 504 if !a.mongoInitialized { 505 return nil 506 } 507 508 conf := a.AgentConfigWriter.CurrentConfig() 509 v := conf.MongoVersion() 510 511 logger.Errorf("Got version change %v", ver) 512 // TODO(perrito666) replace with "read-only" mode for environment when 513 // it is available. 514 if ver.NewerThan(v) > 0 { 515 err := a.AgentConfigWriter.ChangeConfig(func(config agent.ConfigSetter) error { 516 config.SetMongoVersion(mongo.MongoUpgrade) 517 return nil 518 }) 519 if err != nil { 520 return err 521 } 522 523 } 524 return nil 525 526 } 527 528 // PrepareRestore will flag the agent to allow only a limited set 529 // of commands defined in 530 // "github.com/juju/juju/apiserver".allowedMethodsAboutToRestore 531 // the most noteworthy is: 532 // Backups.Restore: this will ensure that we can do all the file movements 533 // required for restore and no one will do changes while we do that. 534 // it will return error if the machine is already in this state. 535 func (a *MachineAgent) PrepareRestore() error { 536 if a.restoreMode { 537 return errors.Errorf("already in restore mode") 538 } 539 a.restoreMode = true 540 return nil 541 } 542 543 // BeginRestore will flag the agent to disallow all commands since 544 // restore should be running and therefore making changes that 545 // would override anything done. 546 func (a *MachineAgent) BeginRestore() error { 547 switch { 548 case !a.restoreMode: 549 return errors.Errorf("not in restore mode, cannot begin restoration") 550 case a.restoring: 551 return errors.Errorf("already restoring") 552 } 553 a.restoring = true 554 return nil 555 } 556 557 // EndRestore will flag the agent to allow all commands 558 // This being invoked means that restore process failed 559 // since success restarts the agent. 560 func (a *MachineAgent) EndRestore() { 561 a.restoreMode = false 562 a.restoring = false 563 } 564 565 // newRestoreStateWatcherWorker will return a worker or err if there 566 // is a failure, the worker takes care of watching the state of 567 // restoreInfo doc and put the agent in the different restore modes. 568 func (a *MachineAgent) newRestoreStateWatcherWorker(st *state.State) (worker.Worker, error) { 569 rWorker := func(stopch <-chan struct{}) error { 570 return a.restoreStateWatcher(st, stopch) 571 } 572 return worker.NewSimpleWorker(rWorker), nil 573 } 574 575 // restoreChanged will be called whenever restoreInfo doc changes signaling a new 576 // step in the restore process. 577 func (a *MachineAgent) restoreChanged(st *state.State) error { 578 rinfo, err := st.RestoreInfoSetter() 579 if err != nil { 580 return errors.Annotate(err, "cannot read restore state") 581 } 582 switch rinfo.Status() { 583 case state.RestorePending: 584 a.PrepareRestore() 585 case state.RestoreInProgress: 586 a.BeginRestore() 587 case state.RestoreFailed: 588 a.EndRestore() 589 } 590 return nil 591 } 592 593 // restoreStateWatcher watches for restoreInfo looking for changes in the restore process. 594 func (a *MachineAgent) restoreStateWatcher(st *state.State, stopch <-chan struct{}) error { 595 restoreWatch := st.WatchRestoreInfoChanges() 596 defer func() { 597 restoreWatch.Kill() 598 restoreWatch.Wait() 599 }() 600 601 for { 602 select { 603 case <-restoreWatch.Changes(): 604 if err := a.restoreChanged(st); err != nil { 605 return err 606 } 607 case <-stopch: 608 return nil 609 } 610 } 611 } 612 613 var newEnvirons = environs.New 614 615 // startAPIWorkers is called to start workers which rely on the 616 // machine agent's API connection (via the apiworkers manifold). It 617 // returns a Runner with a number of workers attached to it. 618 // 619 // The workers started here need to be converted to run under the 620 // dependency engine. Once they have all been converted, this method - 621 // and the apiworkers manifold - can be removed. 622 func (a *MachineAgent) startAPIWorkers(apiConn api.Connection) (_ worker.Worker, outErr error) { 623 agentConfig := a.CurrentConfig() 624 625 entity, err := apiagent.NewState(apiConn).Entity(a.Tag()) 626 if err != nil { 627 return nil, errors.Trace(err) 628 } 629 630 var isModelManager bool 631 for _, job := range entity.Jobs() { 632 switch job { 633 case multiwatcher.JobManageModel: 634 isModelManager = true 635 default: 636 // TODO(dimitern): Once all workers moved over to using 637 // the API, report "unknown job type" here. 638 } 639 } 640 641 runner := newConnRunner(apiConn) 642 defer func() { 643 // If startAPIWorkers exits early with an error, stop the 644 // runner so that any already started runners aren't leaked. 645 if outErr != nil { 646 worker.Stop(runner) 647 } 648 }() 649 650 modelConfig, err := apiagent.NewState(apiConn).ModelConfig() 651 if err != nil { 652 return nil, fmt.Errorf("cannot read model config: %v", err) 653 } 654 655 // Perform the operations needed to set up hosting for containers. 656 if err := a.setupContainerSupport(runner, apiConn, agentConfig); err != nil { 657 cause := errors.Cause(err) 658 if params.IsCodeDead(cause) || cause == worker.ErrTerminateAgent { 659 return nil, worker.ErrTerminateAgent 660 } 661 return nil, fmt.Errorf("setting up container support: %v", err) 662 } 663 664 if isModelManager { 665 666 // Published image metadata for some providers are in simple streams. 667 // Providers that do not depend on simple streams do not need this worker. 668 env, err := newEnvirons(modelConfig) 669 if err != nil { 670 return nil, errors.Annotate(err, "getting environ") 671 } 672 if _, ok := env.(simplestreams.HasRegion); ok { 673 // Start worker that stores published image metadata in state. 674 runner.StartWorker("imagemetadata", func() (worker.Worker, error) { 675 return newMetadataUpdater(apiConn.MetadataUpdater()), nil 676 }) 677 } 678 679 // We don't have instance info set and the network config for the 680 // bootstrap machine only, so update it now. All the other machines will 681 // have instance info including network config set at provisioning time. 682 if err := a.setControllerNetworkConfig(apiConn); err != nil { 683 return nil, errors.Annotate(err, "setting controller network config") 684 } 685 } else { 686 runner.StartWorker("stateconverter", func() (worker.Worker, error) { 687 // TODO(fwereade): this worker needs its own facade. 688 facade := apimachiner.NewState(apiConn) 689 handler := conv2state.New(facade, a) 690 w, err := watcher.NewNotifyWorker(watcher.NotifyConfig{ 691 Handler: handler, 692 }) 693 if err != nil { 694 return nil, errors.Annotate(err, "cannot start controller promoter worker") 695 } 696 return w, nil 697 }) 698 } 699 return runner, nil 700 } 701 702 func (a *MachineAgent) setControllerNetworkConfig(apiConn api.Connection) error { 703 machinerAPI := apimachiner.NewState(apiConn) 704 agentConfig := a.CurrentConfig() 705 706 tag := agentConfig.Tag().(names.MachineTag) 707 machine, err := machinerAPI.Machine(tag) 708 if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead { 709 return worker.ErrTerminateAgent 710 } 711 if err != nil { 712 return errors.Annotatef(err, "cannot load machine %s from state", tag) 713 } 714 715 if err := machine.SetProviderNetworkConfig(); err != nil { 716 return errors.Annotate(err, "cannot set controller provider network config") 717 } 718 return nil 719 } 720 721 // Restart restarts the agent's service. 722 func (a *MachineAgent) Restart() error { 723 name := a.CurrentConfig().Value(agent.AgentServiceName) 724 return service.Restart(name) 725 } 726 727 // openStateForUpgrade exists to be passed into the upgradesteps 728 // worker. The upgradesteps worker opens state independently of the 729 // state worker so that it isn't affected by the state worker's 730 // lifetime. It ensures the MongoDB server is configured and started, 731 // and then opens a state connection. 732 // 733 // TODO(mjs)- review the need for this once the dependency engine is 734 // in use. Why can't upgradesteps depend on the main state connection? 735 func (a *MachineAgent) openStateForUpgrade() (*state.State, error) { 736 agentConfig := a.CurrentConfig() 737 if err := a.ensureMongoServer(agentConfig); err != nil { 738 return nil, errors.Trace(err) 739 } 740 info, ok := agentConfig.MongoInfo() 741 if !ok { 742 return nil, errors.New("no state info available") 743 } 744 st, err := state.Open(agentConfig.Model(), info, mongo.DefaultDialOpts(), environs.NewStatePolicy()) 745 if err != nil { 746 return nil, errors.Trace(err) 747 } 748 return st, nil 749 } 750 751 // setupContainerSupport determines what containers can be run on this machine and 752 // initialises suitable infrastructure to support such containers. 753 func (a *MachineAgent) setupContainerSupport(runner worker.Runner, st api.Connection, agentConfig agent.Config) error { 754 var supportedContainers []instance.ContainerType 755 supportsContainers := container.ContainersSupported() 756 if supportsContainers { 757 supportedContainers = append(supportedContainers, instance.LXC, instance.LXD) 758 } 759 760 supportsKvm, err := kvm.IsKVMSupported() 761 if err != nil { 762 logger.Warningf("determining kvm support: %v\nno kvm containers possible", err) 763 } 764 if err == nil && supportsKvm { 765 supportedContainers = append(supportedContainers, instance.KVM) 766 } 767 768 return a.updateSupportedContainers(runner, st, supportedContainers, agentConfig) 769 } 770 771 // updateSupportedContainers records in state that a machine can run the specified containers. 772 // It starts a watcher and when a container of a given type is first added to the machine, 773 // the watcher is killed, the machine is set up to be able to start containers of the given type, 774 // and a suitable provisioner is started. 775 func (a *MachineAgent) updateSupportedContainers( 776 runner worker.Runner, 777 st api.Connection, 778 containers []instance.ContainerType, 779 agentConfig agent.Config, 780 ) error { 781 pr := st.Provisioner() 782 tag := agentConfig.Tag().(names.MachineTag) 783 machine, err := pr.Machine(tag) 784 if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead { 785 return worker.ErrTerminateAgent 786 } 787 if err != nil { 788 return errors.Annotatef(err, "cannot load machine %s from state", tag) 789 } 790 if len(containers) == 0 { 791 if err := machine.SupportsNoContainers(); err != nil { 792 return errors.Annotatef(err, "clearing supported containers for %s", tag) 793 } 794 return nil 795 } 796 if err := machine.SetSupportedContainers(containers...); err != nil { 797 return errors.Annotatef(err, "setting supported containers for %s", tag) 798 } 799 initLock, err := cmdutil.HookExecutionLock(agentConfig.DataDir()) 800 if err != nil { 801 return err 802 } 803 // Start the watcher to fire when a container is first requested on the machine. 804 modelUUID, err := st.ModelTag() 805 if err != nil { 806 return err 807 } 808 watcherName := fmt.Sprintf("%s-container-watcher", machine.Id()) 809 // There may not be a CA certificate private key available, and without 810 // it we can't ensure that other Juju nodes can connect securely, so only 811 // use an image URL getter if there's a private key. 812 var imageURLGetter container.ImageURLGetter 813 if agentConfig.Value(agent.AllowsSecureConnection) == "true" { 814 cfg, err := pr.ModelConfig() 815 if err != nil { 816 return errors.Annotate(err, "unable to get environ config") 817 } 818 imageURLGetter = container.NewImageURLGetter( 819 // Explicitly call the non-named constructor so if anyone 820 // adds additional fields, this fails. 821 container.ImageURLGetterConfig{ 822 ServerRoot: st.Addr(), 823 ModelUUID: modelUUID.Id(), 824 CACert: []byte(agentConfig.CACert()), 825 CloudimgBaseUrl: cfg.CloudImageBaseURL(), 826 Stream: cfg.ImageStream(), 827 ImageDownloadFunc: container.ImageDownloadURL, 828 }) 829 } 830 params := provisioner.ContainerSetupParams{ 831 Runner: runner, 832 WorkerName: watcherName, 833 SupportedContainers: containers, 834 ImageURLGetter: imageURLGetter, 835 Machine: machine, 836 Provisioner: pr, 837 Config: agentConfig, 838 InitLock: initLock, 839 } 840 handler := provisioner.NewContainerSetupHandler(params) 841 a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) { 842 w, err := watcher.NewStringsWorker(watcher.StringsConfig{ 843 Handler: handler, 844 }) 845 if err != nil { 846 return nil, errors.Annotatef(err, "cannot start %s worker", watcherName) 847 } 848 return w, nil 849 }) 850 return nil 851 } 852 853 func (a *MachineAgent) initState(agentConfig agent.Config) (*state.State, error) { 854 // Start MongoDB server and dial. 855 if err := a.ensureMongoServer(agentConfig); err != nil { 856 return nil, err 857 } 858 859 st, _, err := openState(agentConfig, stateWorkerDialOpts) 860 if err != nil { 861 return nil, err 862 } 863 864 reportOpenedState(st) 865 866 return st, nil 867 } 868 869 // startStateWorkers returns a worker running all the workers that 870 // require a *state.State connection. 871 func (a *MachineAgent) startStateWorkers(st *state.State) (worker.Worker, error) { 872 agentConfig := a.CurrentConfig() 873 874 m, err := getMachine(st, agentConfig.Tag()) 875 if err != nil { 876 return nil, errors.Annotate(err, "machine lookup") 877 } 878 879 runner := newConnRunner(st) 880 singularRunner, err := newSingularStateRunner(runner, st, m) 881 if err != nil { 882 return nil, errors.Trace(err) 883 } 884 885 for _, job := range m.Jobs() { 886 switch job { 887 case state.JobHostUnits: 888 // Implemented elsewhere with workers that use the API. 889 case state.JobManageNetworking: 890 // Not used by state workers. 891 case state.JobManageModel: 892 useMultipleCPUs() 893 a.startWorkerAfterUpgrade(runner, "model worker manager", func() (worker.Worker, error) { 894 w, err := modelworkermanager.New(modelworkermanager.Config{ 895 Backend: st, 896 NewWorker: a.startModelWorkers, 897 ErrorDelay: worker.RestartDelay, 898 }) 899 if err != nil { 900 return nil, errors.Annotate(err, "cannot start model worker manager") 901 } 902 return w, nil 903 }) 904 a.startWorkerAfterUpgrade(runner, "peergrouper", func() (worker.Worker, error) { 905 w, err := peergrouperNew(st) 906 if err != nil { 907 return nil, errors.Annotate(err, "cannot start peergrouper worker") 908 } 909 return w, nil 910 }) 911 a.startWorkerAfterUpgrade(runner, "restore", func() (worker.Worker, error) { 912 w, err := a.newRestoreStateWatcherWorker(st) 913 if err != nil { 914 return nil, errors.Annotate(err, "cannot start backup-restorer worker") 915 } 916 return w, nil 917 }) 918 a.startWorkerAfterUpgrade(runner, "mongoupgrade", func() (worker.Worker, error) { 919 return newUpgradeMongoWorker(st, a.machineId, a.maybeStopMongo) 920 }) 921 922 // certChangedChan is shared by multiple workers it's up 923 // to the agent to close it rather than any one of the 924 // workers. It is possible that multiple cert changes 925 // come in before the apiserver is up to receive them. 926 // Specify a bigger buffer to prevent deadlock when 927 // the apiserver isn't up yet. Use a size of 10 since we 928 // allow up to 7 controllers, and might also update the 929 // addresses of the local machine (127.0.0.1, ::1, etc). 930 // 931 // TODO(cherylj/waigani) Remove this workaround when 932 // certupdater and apiserver can properly manage dependencies 933 // through the dependency engine. 934 // 935 // TODO(ericsnow) For now we simply do not close the channel. 936 certChangedChan := make(chan params.StateServingInfo, 10) 937 // Each time apiserver worker is restarted, we need a fresh copy of state due 938 // to the fact that state holds lease managers which are killed and need to be reset. 939 stateOpener := func() (*state.State, error) { 940 logger.Debugf("opening state for apiserver worker") 941 st, _, err := openState(agentConfig, stateWorkerDialOpts) 942 return st, err 943 } 944 runner.StartWorker("apiserver", a.apiserverWorkerStarter(stateOpener, certChangedChan)) 945 var stateServingSetter certupdater.StateServingInfoSetter = func(info params.StateServingInfo, done <-chan struct{}) error { 946 return a.ChangeConfig(func(config agent.ConfigSetter) error { 947 config.SetStateServingInfo(info) 948 logger.Infof("update apiserver worker with new certificate") 949 select { 950 case certChangedChan <- info: 951 return nil 952 case <-done: 953 return nil 954 } 955 }) 956 } 957 a.startWorkerAfterUpgrade(runner, "certupdater", func() (worker.Worker, error) { 958 return newCertificateUpdater(m, agentConfig, st, st, stateServingSetter), nil 959 }) 960 961 a.startWorkerAfterUpgrade(singularRunner, "dblogpruner", func() (worker.Worker, error) { 962 return dblogpruner.New(st, dblogpruner.NewLogPruneParams()), nil 963 }) 964 965 a.startWorkerAfterUpgrade(singularRunner, "txnpruner", func() (worker.Worker, error) { 966 return txnpruner.New(st, time.Hour*2), nil 967 }) 968 default: 969 return nil, errors.Errorf("unknown job type %q", job) 970 } 971 } 972 return runner, nil 973 } 974 975 // startModelWorkers starts the set of workers that run for every model 976 // in each controller. 977 func (a *MachineAgent) startModelWorkers(uuid string) (worker.Worker, error) { 978 modelAgent, err := model.WrapAgent(a, uuid) 979 if err != nil { 980 return nil, errors.Trace(err) 981 } 982 983 engine, err := dependency.NewEngine(dependency.EngineConfig{ 984 IsFatal: model.IsFatal, 985 WorstError: model.WorstError, 986 Filter: model.IgnoreErrRemoved, 987 ErrorDelay: 3 * time.Second, 988 BounceDelay: 10 * time.Millisecond, 989 }) 990 if err != nil { 991 return nil, errors.Trace(err) 992 } 993 994 manifolds := modelManifolds(model.ManifoldsConfig{ 995 Agent: modelAgent, 996 AgentConfigChanged: a.configChangedVal, 997 Clock: clock.WallClock, 998 RunFlagDuration: time.Minute, 999 CharmRevisionUpdateInterval: 24 * time.Hour, 1000 EntityStatusHistoryCount: 100, 1001 EntityStatusHistoryInterval: 5 * time.Minute, 1002 SpacesImportedGate: a.discoverSpacesComplete, 1003 }) 1004 if err := dependency.Install(engine, manifolds); err != nil { 1005 if err := worker.Stop(engine); err != nil { 1006 logger.Errorf("while stopping engine with bad manifolds: %v", err) 1007 } 1008 return nil, errors.Trace(err) 1009 } 1010 return engine, nil 1011 } 1012 1013 // stateWorkerDialOpts is a mongo.DialOpts suitable 1014 // for use by StateWorker to dial mongo. 1015 // 1016 // This must be overridden in tests, as it assumes 1017 // journaling is enabled. 1018 var stateWorkerDialOpts mongo.DialOpts 1019 1020 func (a *MachineAgent) apiserverWorkerStarter( 1021 stateOpener func() (*state.State, error), certChanged chan params.StateServingInfo, 1022 ) func() (worker.Worker, error) { 1023 return func() (worker.Worker, error) { 1024 st, err := stateOpener() 1025 if err != nil { 1026 return nil, errors.Trace(err) 1027 } 1028 return a.newApiserverWorker(st, certChanged) 1029 } 1030 } 1031 1032 func (a *MachineAgent) newApiserverWorker(st *state.State, certChanged chan params.StateServingInfo) (worker.Worker, error) { 1033 agentConfig := a.CurrentConfig() 1034 // If the configuration does not have the required information, 1035 // it is currently not a recoverable error, so we kill the whole 1036 // agent, potentially enabling human intervention to fix 1037 // the agent's configuration file. 1038 info, ok := agentConfig.StateServingInfo() 1039 if !ok { 1040 return nil, &cmdutil.FatalError{"StateServingInfo not available and we need it"} 1041 } 1042 cert := []byte(info.Cert) 1043 key := []byte(info.PrivateKey) 1044 1045 if len(cert) == 0 || len(key) == 0 { 1046 return nil, &cmdutil.FatalError{"configuration does not have controller cert/key"} 1047 } 1048 tag := agentConfig.Tag() 1049 dataDir := agentConfig.DataDir() 1050 logDir := agentConfig.LogDir() 1051 1052 endpoint := net.JoinHostPort("", strconv.Itoa(info.APIPort)) 1053 listener, err := net.Listen("tcp", endpoint) 1054 if err != nil { 1055 return nil, err 1056 } 1057 w, err := apiserver.NewServer(st, listener, apiserver.ServerConfig{ 1058 Cert: cert, 1059 Key: key, 1060 Tag: tag, 1061 DataDir: dataDir, 1062 LogDir: logDir, 1063 Validator: a.limitLogins, 1064 CertChanged: certChanged, 1065 }) 1066 if err != nil { 1067 return nil, errors.Annotate(err, "cannot start api server worker") 1068 } 1069 return w, nil 1070 } 1071 1072 // limitLogins is called by the API server for each login attempt. 1073 // it returns an error if upgrades or restore are running. 1074 func (a *MachineAgent) limitLogins(req params.LoginRequest) error { 1075 if err := a.limitLoginsDuringRestore(req); err != nil { 1076 return err 1077 } 1078 if err := a.limitLoginsDuringUpgrade(req); err != nil { 1079 return err 1080 } 1081 return a.limitLoginsDuringMongoUpgrade(req) 1082 } 1083 1084 func (a *MachineAgent) limitLoginsDuringMongoUpgrade(req params.LoginRequest) error { 1085 // If upgrade is running we will not be able to lock AgentConfigWriter 1086 // and it also means we are not upgrading mongo. 1087 if a.isUpgradeRunning() { 1088 return nil 1089 } 1090 cfg := a.AgentConfigWriter.CurrentConfig() 1091 ver := cfg.MongoVersion() 1092 if ver == mongo.MongoUpgrade { 1093 return errors.New("Upgrading Mongo") 1094 } 1095 return nil 1096 } 1097 1098 // limitLoginsDuringRestore will only allow logins for restore related purposes 1099 // while the different steps of restore are running. 1100 func (a *MachineAgent) limitLoginsDuringRestore(req params.LoginRequest) error { 1101 var err error 1102 switch { 1103 case a.IsRestoreRunning(): 1104 err = apiserver.RestoreInProgressError 1105 case a.IsRestorePreparing(): 1106 err = apiserver.AboutToRestoreError 1107 } 1108 if err != nil { 1109 authTag, parseErr := names.ParseTag(req.AuthTag) 1110 if parseErr != nil { 1111 return errors.Annotate(err, "could not parse auth tag") 1112 } 1113 switch authTag := authTag.(type) { 1114 case names.UserTag: 1115 // use a restricted API mode 1116 return err 1117 case names.MachineTag: 1118 if authTag == a.Tag() { 1119 // allow logins from the local machine 1120 return nil 1121 } 1122 } 1123 return errors.Errorf("login for %q blocked because restore is in progress", authTag) 1124 } 1125 return nil 1126 } 1127 1128 // limitLoginsDuringUpgrade is called by the API server for each login 1129 // attempt. It returns an error if upgrades are in progress unless the 1130 // login is for a user (i.e. a client) or the local machine. 1131 func (a *MachineAgent) limitLoginsDuringUpgrade(req params.LoginRequest) error { 1132 if a.isUpgradeRunning() || a.isInitialUpgradeCheckPending() { 1133 authTag, err := names.ParseTag(req.AuthTag) 1134 if err != nil { 1135 return errors.Annotate(err, "could not parse auth tag") 1136 } 1137 switch authTag := authTag.(type) { 1138 case names.UserTag: 1139 // use a restricted API mode 1140 return params.UpgradeInProgressError 1141 case names.MachineTag: 1142 if authTag == a.Tag() { 1143 // allow logins from the local machine 1144 return nil 1145 } 1146 } 1147 return errors.Errorf("login for %q blocked because %s", authTag, params.CodeUpgradeInProgress) 1148 } else { 1149 return nil // allow all logins 1150 } 1151 } 1152 1153 var stateWorkerServingConfigErr = errors.New("state worker started with no state serving info") 1154 1155 // ensureMongoServer ensures that mongo is installed and running, 1156 // and ready for opening a state connection. 1157 func (a *MachineAgent) ensureMongoServer(agentConfig agent.Config) (err error) { 1158 a.mongoInitMutex.Lock() 1159 defer a.mongoInitMutex.Unlock() 1160 if a.mongoInitialized { 1161 logger.Debugf("mongo is already initialized") 1162 return nil 1163 } 1164 defer func() { 1165 if err == nil { 1166 a.mongoInitialized = true 1167 } 1168 }() 1169 1170 mongoInstalled, err := mongo.IsServiceInstalled() 1171 if err != nil { 1172 return errors.Annotate(err, "error while checking if mongodb service is installed") 1173 } 1174 1175 if !mongoInstalled { 1176 // EnsureMongoServer installs/upgrades the init config as necessary. 1177 ensureServerParams, err := cmdutil.NewEnsureServerParams(agentConfig) 1178 if err != nil { 1179 return err 1180 } 1181 if err := cmdutil.EnsureMongoServer(ensureServerParams); err != nil { 1182 return err 1183 } 1184 } 1185 logger.Debugf("mongodb service is installed") 1186 1187 // Mongo is installed, record the version. 1188 err = a.ChangeConfig(func(config agent.ConfigSetter) error { 1189 config.SetMongoVersion(mongo.InstalledVersion()) 1190 return nil 1191 }) 1192 if err != nil { 1193 return errors.Annotate(err, "cannot set mongo version") 1194 } 1195 return nil 1196 } 1197 1198 func openState(agentConfig agent.Config, dialOpts mongo.DialOpts) (_ *state.State, _ *state.Machine, err error) { 1199 info, ok := agentConfig.MongoInfo() 1200 if !ok { 1201 return nil, nil, fmt.Errorf("no state info available") 1202 } 1203 st, err := state.Open(agentConfig.Model(), info, dialOpts, environs.NewStatePolicy()) 1204 if err != nil { 1205 return nil, nil, err 1206 } 1207 defer func() { 1208 if err != nil { 1209 st.Close() 1210 } 1211 }() 1212 m0, err := st.FindEntity(agentConfig.Tag()) 1213 if err != nil { 1214 if errors.IsNotFound(err) { 1215 err = worker.ErrTerminateAgent 1216 } 1217 return nil, nil, err 1218 } 1219 m := m0.(*state.Machine) 1220 if m.Life() == state.Dead { 1221 return nil, nil, worker.ErrTerminateAgent 1222 } 1223 // Check the machine nonce as provisioned matches the agent.Conf value. 1224 if !m.CheckProvisioned(agentConfig.Nonce()) { 1225 // The agent is running on a different machine to the one it 1226 // should be according to state. It must stop immediately. 1227 logger.Errorf("running machine %v agent on inappropriate instance", m) 1228 return nil, nil, worker.ErrTerminateAgent 1229 } 1230 return st, m, nil 1231 } 1232 1233 func getMachine(st *state.State, tag names.Tag) (*state.Machine, error) { 1234 m0, err := st.FindEntity(tag) 1235 if err != nil { 1236 return nil, err 1237 } 1238 return m0.(*state.Machine), nil 1239 } 1240 1241 // startWorkerAfterUpgrade starts a worker to run the specified child worker 1242 // but only after waiting for upgrades to complete. 1243 func (a *MachineAgent) startWorkerAfterUpgrade(runner worker.Runner, name string, start func() (worker.Worker, error)) { 1244 runner.StartWorker(name, func() (worker.Worker, error) { 1245 return a.upgradeWaiterWorker(name, start), nil 1246 }) 1247 } 1248 1249 // upgradeWaiterWorker runs the specified worker after upgrades have completed. 1250 func (a *MachineAgent) upgradeWaiterWorker(name string, start func() (worker.Worker, error)) worker.Worker { 1251 return worker.NewSimpleWorker(func(stop <-chan struct{}) error { 1252 // Wait for the agent upgrade and upgrade steps to complete (or for us to be stopped). 1253 for _, ch := range []<-chan struct{}{ 1254 a.upgradeComplete.Unlocked(), 1255 a.initialUpgradeCheckComplete.Unlocked(), 1256 } { 1257 select { 1258 case <-stop: 1259 return nil 1260 case <-ch: 1261 } 1262 } 1263 logger.Debugf("upgrades done, starting worker %q", name) 1264 1265 // Upgrades are done, start the worker. 1266 worker, err := start() 1267 if err != nil { 1268 return err 1269 } 1270 // Wait for worker to finish or for us to be stopped. 1271 waitCh := make(chan error) 1272 go func() { 1273 waitCh <- worker.Wait() 1274 }() 1275 select { 1276 case err := <-waitCh: 1277 logger.Debugf("worker %q exited with %v", name, err) 1278 return err 1279 case <-stop: 1280 logger.Debugf("stopping so killing worker %q", name) 1281 worker.Kill() 1282 } 1283 return <-waitCh // Ensure worker has stopped before returning. 1284 }) 1285 } 1286 1287 // WorkersStarted returns a channel that's closed once all top level workers 1288 // have been started. This is provided for testing purposes. 1289 func (a *MachineAgent) WorkersStarted() <-chan struct{} { 1290 return a.workersStarted 1291 } 1292 1293 func (a *MachineAgent) Tag() names.Tag { 1294 return names.NewMachineTag(a.machineId) 1295 } 1296 1297 func (a *MachineAgent) createJujudSymlinks(dataDir string) error { 1298 jujud := filepath.Join(tools.ToolsDir(dataDir, a.Tag().String()), jujunames.Jujud) 1299 for _, link := range []string{jujuRun, jujuDumpLogs} { 1300 err := a.createSymlink(jujud, link) 1301 if err != nil { 1302 return errors.Annotatef(err, "failed to create %s symlink", link) 1303 } 1304 } 1305 return nil 1306 } 1307 1308 func (a *MachineAgent) createSymlink(target, link string) error { 1309 fullLink := utils.EnsureBaseDir(a.rootDir, link) 1310 1311 currentTarget, err := symlink.Read(fullLink) 1312 if err != nil && !os.IsNotExist(err) { 1313 return err 1314 } else if err == nil { 1315 // Link already in place - check it. 1316 if currentTarget == target { 1317 // Link already points to the right place - nothing to do. 1318 return nil 1319 } 1320 // Link points to the wrong place - delete it. 1321 if err := os.Remove(fullLink); err != nil { 1322 return err 1323 } 1324 } 1325 1326 if err := os.MkdirAll(filepath.Dir(fullLink), os.FileMode(0755)); err != nil { 1327 return err 1328 } 1329 return symlink.New(target, fullLink) 1330 } 1331 1332 func (a *MachineAgent) removeJujudSymlinks() (errs []error) { 1333 for _, link := range []string{jujuRun, jujuDumpLogs} { 1334 err := os.Remove(utils.EnsureBaseDir(a.rootDir, link)) 1335 if err != nil && !os.IsNotExist(err) { 1336 errs = append(errs, errors.Annotatef(err, "failed to remove %s symlink", link)) 1337 } 1338 } 1339 return 1340 } 1341 1342 func (a *MachineAgent) uninstallAgent() error { 1343 // We should only uninstall if the uninstall file is present. 1344 if !agent.CanUninstall(a) { 1345 logger.Infof("ignoring uninstall request") 1346 return nil 1347 } 1348 logger.Infof("uninstalling agent") 1349 1350 agentConfig := a.CurrentConfig() 1351 var errs []error 1352 agentServiceName := agentConfig.Value(agent.AgentServiceName) 1353 if agentServiceName == "" { 1354 // For backwards compatibility, handle lack of AgentServiceName. 1355 agentServiceName = os.Getenv("UPSTART_JOB") 1356 } 1357 1358 if agentServiceName != "" { 1359 svc, err := service.DiscoverService(agentServiceName, common.Conf{}) 1360 if err != nil { 1361 errs = append(errs, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err)) 1362 } else if err := svc.Remove(); err != nil { 1363 errs = append(errs, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err)) 1364 } 1365 } 1366 1367 errs = append(errs, a.removeJujudSymlinks()...) 1368 1369 // TODO(fwereade): surely this shouldn't be happening here? Once we're 1370 // at this point we should expect to be killed in short order; if this 1371 // work is remotely important we should be blocking machine death on 1372 // its completion. 1373 insideContainer := container.RunningInContainer() 1374 if insideContainer { 1375 // We're running inside LXC, so loop devices may leak. Detach 1376 // any loop devices that are backed by files on this machine. 1377 // 1378 // It is necessary to do this here as well as in container/lxc, 1379 // as container/lxc needs to check in the container's rootfs 1380 // to see if the loop device is attached to the container; that 1381 // will fail if the data-dir is removed first. 1382 if err := a.loopDeviceManager.DetachLoopDevices("/", agentConfig.DataDir()); err != nil { 1383 errs = append(errs, err) 1384 } 1385 } 1386 1387 if err := mongo.RemoveService(); err != nil { 1388 errs = append(errs, errors.Annotate(err, "cannot stop/remove mongo service")) 1389 } 1390 if err := os.RemoveAll(agentConfig.DataDir()); err != nil { 1391 errs = append(errs, err) 1392 } 1393 if len(errs) == 0 { 1394 return nil 1395 } 1396 return fmt.Errorf("uninstall failed: %v", errs) 1397 } 1398 1399 func newConnRunner(conns ...cmdutil.Pinger) worker.Runner { 1400 return worker.NewRunner(cmdutil.ConnectionIsFatal(logger, conns...), cmdutil.MoreImportant, worker.RestartDelay) 1401 } 1402 1403 type MongoSessioner interface { 1404 MongoSession() *mgo.Session 1405 } 1406 1407 func newSingularStateRunner(runner worker.Runner, st MongoSessioner, m *state.Machine) (worker.Runner, error) { 1408 singularStateConn := singularStateConn{st.MongoSession(), m} 1409 singularRunner, err := newSingularRunner(runner, singularStateConn) 1410 if err != nil { 1411 return nil, errors.Annotate(err, "cannot make singular State Runner") 1412 } 1413 return singularRunner, err 1414 } 1415 1416 // singularStateConn implements singular.Conn on 1417 // top of a State connection. 1418 type singularStateConn struct { 1419 session *mgo.Session 1420 machine *state.Machine 1421 } 1422 1423 func (c singularStateConn) IsMaster() (bool, error) { 1424 return mongo.IsMaster(c.session, c.machine) 1425 } 1426 1427 func (c singularStateConn) Ping() error { 1428 return c.session.Ping() 1429 } 1430 1431 func metricAPI(st api.Connection) (metricsmanager.MetricsManagerClient, error) { 1432 client, err := metricsmanager.NewClient(st) 1433 if err != nil { 1434 return nil, errors.Trace(err) 1435 } 1436 return client, nil 1437 } 1438 1439 // newDeployContext gives the tests the opportunity to create a deployer.Context 1440 // that can be used for testing so as to avoid (1) deploying units to the system 1441 // running the tests and (2) get access to the *State used internally, so that 1442 // tests can be run without waiting for the 5s watcher refresh time to which we would 1443 // otherwise be restricted. 1444 var newDeployContext = func(st *apideployer.State, agentConfig agent.Config) deployer.Context { 1445 return deployer.NewSimpleContext(agentConfig, st) 1446 }