github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/worker/uniter/uniter.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 "os" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/juju/errors" 14 "github.com/juju/loggo" 15 "github.com/juju/names" 16 "github.com/juju/utils/exec" 17 "github.com/juju/utils/fslock" 18 corecharm "gopkg.in/juju/charm.v5" 19 "launchpad.net/tomb" 20 21 "github.com/juju/juju/api/uniter" 22 "github.com/juju/juju/apiserver/params" 23 "github.com/juju/juju/version" 24 "github.com/juju/juju/worker" 25 "github.com/juju/juju/worker/leadership" 26 "github.com/juju/juju/worker/uniter/charm" 27 "github.com/juju/juju/worker/uniter/filter" 28 "github.com/juju/juju/worker/uniter/operation" 29 "github.com/juju/juju/worker/uniter/runner" 30 "github.com/juju/juju/worker/uniter/runner/jujuc" 31 "github.com/juju/juju/worker/uniter/storage" 32 ) 33 34 var logger = loggo.GetLogger("juju.worker.uniter") 35 36 // leadershipGuarantee defines the period of time for which a successful call 37 // to the is-leader hook tool guarantees continued leadership. 38 var leadershipGuarantee = 30 * time.Second 39 40 // A UniterExecutionObserver gets the appropriate methods called when a hook 41 // is executed and either succeeds or fails. Missing hooks don't get reported 42 // in this way. 43 type UniterExecutionObserver interface { 44 HookCompleted(hookName string) 45 HookFailed(hookName string) 46 } 47 48 // Uniter implements the capabilities of the unit agent. It is not intended to 49 // implement the actual *behaviour* of the unit agent; that responsibility is 50 // delegated to Mode values, which are expected to react to events and direct 51 // the uniter's responses to them. 52 type Uniter struct { 53 tomb tomb.Tomb 54 st *uniter.State 55 paths Paths 56 f filter.Filter 57 unit *uniter.Unit 58 relations Relations 59 cleanups []cleanup 60 storage *storage.Attachments 61 62 // Cache the last reported status information 63 // so we don't make unnecessary api calls. 64 setStatusMutex sync.Mutex 65 lastReportedStatus params.Status 66 lastReportedMessage string 67 68 deployer *deployerProxy 69 operationFactory operation.Factory 70 operationExecutor operation.Executor 71 newOperationExecutor NewExecutorFunc 72 73 leadershipTracker leadership.Tracker 74 75 hookLock *fslock.Lock 76 runListener *RunListener 77 78 ranLeaderSettingsChanged bool 79 ranConfigChanged bool 80 81 // The execution observer is only used in tests at this stage. Should this 82 // need to be extended, perhaps a list of observers would be needed. 83 observer UniterExecutionObserver 84 85 // metricsTimerChooser is a struct that allows metrics to switch between 86 // active and inactive timers. 87 metricsTimerChooser *timerChooser 88 89 // collectMetricsAt defines a function that will be used to generate signals 90 // for the collect-metrics hook. 91 collectMetricsAt TimedSignal 92 93 // sendMetricsAt defines a function that will be used to generate signals 94 // to send metrics to the state server. 95 sendMetricsAt TimedSignal 96 97 // updateStatusAt defines a function that will be used to generate signals for 98 // the update-status hook 99 updateStatusAt TimedSignal 100 } 101 102 // UniterParams hold all the necessary parameters for a new Uniter. 103 type UniterParams struct { 104 UniterFacade *uniter.State 105 UnitTag names.UnitTag 106 LeadershipTracker leadership.Tracker 107 DataDir string 108 MachineLock *fslock.Lock 109 MetricsTimerChooser *timerChooser 110 UpdateStatusSignal TimedSignal 111 NewOperationExecutor NewExecutorFunc 112 } 113 114 type NewExecutorFunc func(string, func() (*corecharm.URL, error), func(string) (func() error, error)) (operation.Executor, error) 115 116 // NewUniter creates a new Uniter which will install, run, and upgrade 117 // a charm on behalf of the unit with the given unitTag, by executing 118 // hooks and operations provoked by changes in st. 119 func NewUniter(uniterParams *UniterParams) *Uniter { 120 u := &Uniter{ 121 st: uniterParams.UniterFacade, 122 paths: NewPaths(uniterParams.DataDir, uniterParams.UnitTag), 123 hookLock: uniterParams.MachineLock, 124 leadershipTracker: uniterParams.LeadershipTracker, 125 metricsTimerChooser: uniterParams.MetricsTimerChooser, 126 collectMetricsAt: uniterParams.MetricsTimerChooser.inactive, 127 sendMetricsAt: uniterParams.MetricsTimerChooser.inactive, 128 updateStatusAt: uniterParams.UpdateStatusSignal, 129 newOperationExecutor: uniterParams.NewOperationExecutor, 130 } 131 go func() { 132 defer u.tomb.Done() 133 defer u.runCleanups() 134 u.tomb.Kill(u.loop(uniterParams.UnitTag)) 135 }() 136 return u 137 } 138 139 type cleanup func() error 140 141 func (u *Uniter) addCleanup(cleanup cleanup) { 142 u.cleanups = append(u.cleanups, cleanup) 143 } 144 145 func (u *Uniter) runCleanups() { 146 for _, cleanup := range u.cleanups { 147 u.tomb.Kill(cleanup()) 148 } 149 } 150 151 func (u *Uniter) loop(unitTag names.UnitTag) (err error) { 152 if err := u.init(unitTag); err != nil { 153 if err == worker.ErrTerminateAgent { 154 return err 155 } 156 return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err) 157 } 158 logger.Infof("unit %q started", u.unit) 159 160 // Start filtering state change events for consumption by modes. 161 u.f, err = filter.NewFilter(u.st, unitTag) 162 if err != nil { 163 return err 164 } 165 u.addCleanup(u.f.Stop) 166 167 // Stop the uniter if the filter fails. 168 go func() { u.tomb.Kill(u.f.Wait()) }() 169 170 // Start handling leader settings events, or not, as appropriate. 171 u.f.WantLeaderSettingsEvents(!u.operationState().Leader) 172 173 // Run modes until we encounter an error. 174 mode := ModeContinue 175 for err == nil { 176 select { 177 case <-u.tomb.Dying(): 178 err = tomb.ErrDying 179 default: 180 mode, err = mode(u) 181 switch cause := errors.Cause(err); cause { 182 case operation.ErrNeedsReboot: 183 err = worker.ErrRebootMachine 184 case tomb.ErrDying, worker.ErrTerminateAgent: 185 err = cause 186 case operation.ErrHookFailed: 187 mode, err = ModeHookError, nil 188 default: 189 charmURL, ok := operation.DeployConflictCharmURL(cause) 190 if ok { 191 mode, err = ModeConflicted(charmURL), nil 192 } 193 } 194 } 195 } 196 197 logger.Infof("unit %q shutting down: %s", u.unit, err) 198 return err 199 } 200 201 func (u *Uniter) setupLocks() (err error) { 202 if message := u.hookLock.Message(); u.hookLock.IsLocked() && message != "" { 203 // Look to see if it was us that held the lock before. If it was, we 204 // should be safe enough to break it, as it is likely that we died 205 // before unlocking, and have been restarted by the init system. 206 parts := strings.SplitN(message, ":", 2) 207 if len(parts) > 1 && parts[0] == u.unit.Name() { 208 if err := u.hookLock.BreakLock(); err != nil { 209 return err 210 } 211 } 212 } 213 return nil 214 } 215 216 func (u *Uniter) init(unitTag names.UnitTag) (err error) { 217 u.unit, err = u.st.Unit(unitTag) 218 if err != nil { 219 return err 220 } 221 if u.unit.Life() == params.Dead { 222 // If we started up already dead, we should not progress further. If we 223 // become Dead immediately after starting up, we may well complete any 224 // operations in progress before detecting it; but that race is fundamental 225 // and inescapable, whereas this one is not. 226 return worker.ErrTerminateAgent 227 } 228 if err = u.setupLocks(); err != nil { 229 return err 230 } 231 if err := jujuc.EnsureSymlinks(u.paths.ToolsDir); err != nil { 232 return err 233 } 234 if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil { 235 return errors.Trace(err) 236 } 237 relations, err := newRelations(u.st, unitTag, u.paths, u.tomb.Dying()) 238 if err != nil { 239 return errors.Annotatef(err, "cannot create relations") 240 } 241 u.relations = relations 242 storageAttachments, err := storage.NewAttachments( 243 u.st, unitTag, u.paths.State.StorageDir, u.tomb.Dying(), 244 ) 245 if err != nil { 246 return errors.Annotatef(err, "cannot create storage hook source") 247 } 248 u.storage = storageAttachments 249 u.addCleanup(storageAttachments.Stop) 250 251 deployer, err := charm.NewDeployer( 252 u.paths.State.CharmDir, 253 u.paths.State.DeployerDir, 254 charm.NewBundlesDir(u.paths.State.BundlesDir), 255 ) 256 if err != nil { 257 return errors.Annotatef(err, "cannot create deployer") 258 } 259 u.deployer = &deployerProxy{deployer} 260 contextFactory, err := runner.NewContextFactory( 261 u.st, unitTag, u.leadershipTracker, u.relations.GetInfo, u.storage, u.paths, 262 ) 263 if err != nil { 264 return err 265 } 266 runnerFactory, err := runner.NewFactory( 267 u.st, u.paths, contextFactory, 268 ) 269 if err != nil { 270 return err 271 } 272 u.operationFactory = operation.NewFactory(operation.FactoryParams{ 273 Deployer: u.deployer, 274 RunnerFactory: runnerFactory, 275 Callbacks: &operationCallbacks{u}, 276 StorageUpdater: u.storage, 277 Abort: u.tomb.Dying(), 278 MetricSender: u.unit, 279 MetricSpoolDir: u.paths.GetMetricsSpoolDir(), 280 }) 281 282 operationExecutor, err := u.newOperationExecutor(u.paths.State.OperationsFile, u.getServiceCharmURL, u.acquireExecutionLock) 283 if err != nil { 284 return err 285 } 286 u.operationExecutor = operationExecutor 287 288 logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket) 289 u.runListener, err = NewRunListener(u, u.paths.Runtime.JujuRunSocket) 290 if err != nil { 291 return err 292 } 293 u.addCleanup(func() error { 294 // TODO(fwereade): RunListener returns no error on Close. This seems wrong. 295 u.runListener.Close() 296 return nil 297 }) 298 // The socket needs to have permissions 777 in order for other users to use it. 299 if version.Current.OS != version.Windows { 300 return os.Chmod(u.paths.Runtime.JujuRunSocket, 0777) 301 } 302 return nil 303 } 304 305 func (u *Uniter) Kill() { 306 u.tomb.Kill(nil) 307 } 308 309 func (u *Uniter) Wait() error { 310 return u.tomb.Wait() 311 } 312 313 func (u *Uniter) Stop() error { 314 u.tomb.Kill(nil) 315 return u.Wait() 316 } 317 318 func (u *Uniter) Dead() <-chan struct{} { 319 return u.tomb.Dead() 320 } 321 322 func (u *Uniter) getServiceCharmURL() (*corecharm.URL, error) { 323 // TODO(fwereade): pretty sure there's no reason to make 2 API calls here. 324 service, err := u.st.Service(u.unit.ServiceTag()) 325 if err != nil { 326 return nil, err 327 } 328 charmURL, _, err := service.CharmURL() 329 return charmURL, err 330 } 331 332 func (u *Uniter) operationState() operation.State { 333 return u.operationExecutor.State() 334 } 335 336 // initializeMetricsTimers enables the periodic collect-metrics hook 337 // and periodic sending of collected metrics for charms that declare metrics. 338 func (u *Uniter) initializeMetricsTimers() error { 339 charm, err := corecharm.ReadCharmDir(u.paths.State.CharmDir) 340 if err != nil { 341 return err 342 } 343 u.collectMetricsAt = u.metricsTimerChooser.getCollectMetricsTimer(charm) 344 u.sendMetricsAt = u.metricsTimerChooser.getSendMetricsTimer(charm) 345 return nil 346 } 347 348 // RunCommands executes the supplied commands in a hook context. 349 func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) { 350 // TODO(fwereade): this is *still* all sorts of messed-up and not especially 351 // goroutine-safe, but that's not what I'm fixing at the moment. We could 352 // address this by: 353 // 1) implementing an operation to encapsulate the relations.Update call 354 // 2) (quick+dirty) mutex runOperation until we can 355 // 3) (correct) feed RunCommands requests into the mode funcs (or any queue 356 // that replaces them) such that they're handled and prioritised like 357 // every other operation. 358 logger.Tracef("run commands: %s", args.Commands) 359 360 type responseInfo struct { 361 response *exec.ExecResponse 362 err error 363 } 364 responseChan := make(chan responseInfo, 1) 365 sendResponse := func(response *exec.ExecResponse, err error) { 366 responseChan <- responseInfo{response, err} 367 } 368 369 commandArgs := operation.CommandArgs{ 370 Commands: args.Commands, 371 RelationId: args.RelationId, 372 RemoteUnitName: args.RemoteUnitName, 373 ForceRemoteUnit: args.ForceRemoteUnit, 374 } 375 err = u.runOperation(newCommandsOp(commandArgs, sendResponse)) 376 if err == nil { 377 select { 378 case response := <-responseChan: 379 results, err = response.response, response.err 380 default: 381 err = errors.New("command response never sent") 382 } 383 } 384 if errors.Cause(err) == operation.ErrNeedsReboot { 385 u.tomb.Kill(worker.ErrRebootMachine) 386 err = nil 387 } 388 if err != nil { 389 u.tomb.Kill(err) 390 } 391 return results, err 392 } 393 394 // runOperation uses the uniter's operation factory to run the supplied creation 395 // func, and then runs the resulting operation. 396 // 397 // This has a number of advantages over having mode funcs use the factory and 398 // executor directly: 399 // * it cuts down on duplicated code in the mode funcs, making the logic easier 400 // to parse 401 // * it narrows the (conceptual) interface exposed to the mode funcs -- one day 402 // we might even be able to use a (real) interface and maybe even approach a 403 // point where we can run direct unit tests(!) on the modes themselves. 404 // * it opens a path to fixing RunCommands -- all operation creation and 405 // execution is done in a single place, and it's much easier to force those 406 // onto a single thread. 407 // * this can't be done quite yet, though, because relation changes are 408 // not yet encapsulated in operations, and that needs to happen before 409 // RunCommands will *actually* be goroutine-safe. 410 func (u *Uniter) runOperation(creator creator) (err error) { 411 errorMessage := "creating operation to run" 412 defer func() { 413 reportAgentError(u, errorMessage, err) 414 }() 415 op, err := creator(u.operationFactory) 416 if err != nil { 417 return errors.Annotatef(err, "cannot create operation") 418 } 419 errorMessage = op.String() 420 before := u.operationState() 421 defer func() { 422 // Check that if we lose leadership as a result of this 423 // operation, we want to start getting leader settings events, 424 // or if we gain leadership we want to stop receiving those 425 // events. 426 if after := u.operationState(); before.Leader != after.Leader { 427 u.f.WantLeaderSettingsEvents(before.Leader) 428 } 429 }() 430 return u.operationExecutor.Run(op) 431 } 432 433 // acquireExecutionLock acquires the machine-level execution lock, and 434 // returns a func that must be called to unlock it. It's used by operation.Executor 435 // when running operations that execute external code. 436 func (u *Uniter) acquireExecutionLock(message string) (func() error, error) { 437 // We want to make sure we don't block forever when locking, but take the 438 // Uniter's tomb into account. 439 checkTomb := func() error { 440 select { 441 case <-u.tomb.Dying(): 442 return tomb.ErrDying 443 default: 444 return nil 445 } 446 } 447 message = fmt.Sprintf("%s: %s", u.unit.Name(), message) 448 if err := u.hookLock.LockWithFunc(message, checkTomb); err != nil { 449 return nil, err 450 } 451 return func() error { return u.hookLock.Unlock() }, nil 452 }