github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/worker/uniter/uniter.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package uniter 5 6 import ( 7 "fmt" 8 "os" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/juju/errors" 14 "github.com/juju/loggo" 15 "github.com/juju/names" 16 "github.com/juju/utils/exec" 17 "github.com/juju/utils/fslock" 18 corecharm "gopkg.in/juju/charm.v5" 19 "launchpad.net/tomb" 20 21 "github.com/juju/juju/api/uniter" 22 "github.com/juju/juju/apiserver/params" 23 coreleadership "github.com/juju/juju/leadership" 24 "github.com/juju/juju/version" 25 "github.com/juju/juju/worker" 26 "github.com/juju/juju/worker/leadership" 27 "github.com/juju/juju/worker/uniter/charm" 28 "github.com/juju/juju/worker/uniter/filter" 29 "github.com/juju/juju/worker/uniter/operation" 30 "github.com/juju/juju/worker/uniter/runner" 31 "github.com/juju/juju/worker/uniter/runner/jujuc" 32 "github.com/juju/juju/worker/uniter/storage" 33 ) 34 35 var logger = loggo.GetLogger("juju.worker.uniter") 36 37 // leadershipGuarantee defines the period of time for which a successful call 38 // to the is-leader hook tool guarantees continued leadership. 39 var leadershipGuarantee = 30 * time.Second 40 41 // A UniterExecutionObserver gets the appropriate methods called when a hook 42 // is executed and either succeeds or fails. Missing hooks don't get reported 43 // in this way. 44 type UniterExecutionObserver interface { 45 HookCompleted(hookName string) 46 HookFailed(hookName string) 47 } 48 49 // Uniter implements the capabilities of the unit agent. It is not intended to 50 // implement the actual *behaviour* of the unit agent; that responsibility is 51 // delegated to Mode values, which are expected to react to events and direct 52 // the uniter's responses to them. 53 type Uniter struct { 54 tomb tomb.Tomb 55 st *uniter.State 56 paths Paths 57 f filter.Filter 58 unit *uniter.Unit 59 relations Relations 60 cleanups []cleanup 61 storage *storage.Attachments 62 63 // Cache the last reported status information 64 // so we don't make unnecessary api calls. 65 setStatusMutex sync.Mutex 66 lastReportedStatus params.Status 67 lastReportedMessage string 68 69 deployer *deployerProxy 70 operationFactory operation.Factory 71 operationExecutor operation.Executor 72 73 leadershipManager coreleadership.LeadershipManager 74 leadershipTracker leadership.Tracker 75 76 hookLock *fslock.Lock 77 runListener *RunListener 78 79 ranLeaderSettingsChanged bool 80 ranConfigChanged bool 81 82 // The execution observer is only used in tests at this stage. Should this 83 // need to be extended, perhaps a list of observers would be needed. 84 observer UniterExecutionObserver 85 86 // metricsTimerChooser is a struct that allows metrics to switch between 87 // active and inactive timers. 88 metricsTimerChooser *timerChooser 89 90 // collectMetricsAt defines a function that will be used to generate signals 91 // for the collect-metrics hook. 92 collectMetricsAt TimedSignal 93 94 // updateStatusAt defines a function that will be used to generate signals for 95 // the update-status hook 96 updateStatusAt TimedSignal 97 } 98 99 // UniterParams hold all the necessary parameters for a new Uniter. 100 type UniterParams struct { 101 St *uniter.State 102 UnitTag names.UnitTag 103 LeadershipManager coreleadership.LeadershipManager 104 DataDir string 105 HookLock *fslock.Lock 106 MetricsTimerChooser *timerChooser 107 UpdateStatusSignal TimedSignal 108 } 109 110 // NewUniter creates a new Uniter which will install, run, and upgrade 111 // a charm on behalf of the unit with the given unitTag, by executing 112 // hooks and operations provoked by changes in st. 113 func NewUniter(uniterParams *UniterParams) *Uniter { 114 u := &Uniter{ 115 st: uniterParams.St, 116 paths: NewPaths(uniterParams.DataDir, uniterParams.UnitTag), 117 hookLock: uniterParams.HookLock, 118 leadershipManager: uniterParams.LeadershipManager, 119 metricsTimerChooser: uniterParams.MetricsTimerChooser, 120 collectMetricsAt: uniterParams.MetricsTimerChooser.inactive, 121 updateStatusAt: uniterParams.UpdateStatusSignal, 122 } 123 go func() { 124 defer u.tomb.Done() 125 defer u.runCleanups() 126 u.tomb.Kill(u.loop(uniterParams.UnitTag)) 127 }() 128 return u 129 } 130 131 type cleanup func() error 132 133 func (u *Uniter) addCleanup(cleanup cleanup) { 134 u.cleanups = append(u.cleanups, cleanup) 135 } 136 137 func (u *Uniter) runCleanups() { 138 for _, cleanup := range u.cleanups { 139 u.tomb.Kill(cleanup()) 140 } 141 } 142 143 func (u *Uniter) loop(unitTag names.UnitTag) (err error) { 144 // Start tracking leadership state. 145 // TODO(fwereade): ideally, this wouldn't be created here; as a worker it's 146 // clearly better off being managed by a Runner. However, we haven't come up 147 // with a clean way to reference one (lineage of a...) worker from another, 148 // so for now the tracker is accessible only to its unit. 149 leadershipTracker := leadership.NewTrackerWorker( 150 unitTag, u.leadershipManager, leadershipGuarantee, 151 ) 152 u.addCleanup(func() error { 153 return worker.Stop(leadershipTracker) 154 }) 155 u.leadershipTracker = leadershipTracker 156 157 if err := u.init(unitTag); err != nil { 158 if err == worker.ErrTerminateAgent { 159 return err 160 } 161 return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err) 162 } 163 logger.Infof("unit %q started", u.unit) 164 165 // Start filtering state change events for consumption by modes. 166 u.f, err = filter.NewFilter(u.st, unitTag) 167 if err != nil { 168 return err 169 } 170 u.addCleanup(u.f.Stop) 171 172 // Stop the uniter if either of these components fails. 173 go func() { u.tomb.Kill(leadershipTracker.Wait()) }() 174 go func() { u.tomb.Kill(u.f.Wait()) }() 175 176 // Start handling leader settings events, or not, as appropriate. 177 u.f.WantLeaderSettingsEvents(!u.operationState().Leader) 178 179 // Run modes until we encounter an error. 180 mode := ModeContinue 181 for err == nil { 182 select { 183 case <-u.tomb.Dying(): 184 err = tomb.ErrDying 185 default: 186 mode, err = mode(u) 187 switch cause := errors.Cause(err); cause { 188 case operation.ErrNeedsReboot: 189 err = worker.ErrRebootMachine 190 case tomb.ErrDying, worker.ErrTerminateAgent: 191 err = cause 192 case operation.ErrHookFailed: 193 mode, err = ModeHookError, nil 194 default: 195 charmURL, ok := operation.DeployConflictCharmURL(cause) 196 if ok { 197 mode, err = ModeConflicted(charmURL), nil 198 } 199 } 200 } 201 } 202 logger.Infof("unit %q shutting down: %s", u.unit, err) 203 return err 204 } 205 206 func (u *Uniter) setupLocks() (err error) { 207 if message := u.hookLock.Message(); u.hookLock.IsLocked() && message != "" { 208 // Look to see if it was us that held the lock before. If it was, we 209 // should be safe enough to break it, as it is likely that we died 210 // before unlocking, and have been restarted by the init system. 211 parts := strings.SplitN(message, ":", 2) 212 if len(parts) > 1 && parts[0] == u.unit.Name() { 213 if err := u.hookLock.BreakLock(); err != nil { 214 return err 215 } 216 } 217 } 218 return nil 219 } 220 221 func (u *Uniter) init(unitTag names.UnitTag) (err error) { 222 u.unit, err = u.st.Unit(unitTag) 223 if err != nil { 224 return err 225 } 226 if u.unit.Life() == params.Dead { 227 // If we started up already dead, we should not progress further. If we 228 // become Dead immediately after starting up, we may well complete any 229 // operations in progress before detecting it; but that race is fundamental 230 // and inescapable, whereas this one is not. 231 return worker.ErrTerminateAgent 232 } 233 if err = u.setupLocks(); err != nil { 234 return err 235 } 236 if err := jujuc.EnsureSymlinks(u.paths.ToolsDir); err != nil { 237 return err 238 } 239 if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil { 240 return errors.Trace(err) 241 } 242 relations, err := newRelations(u.st, unitTag, u.paths, u.tomb.Dying()) 243 if err != nil { 244 return errors.Annotatef(err, "cannot create relations") 245 } 246 u.relations = relations 247 storageAttachments, err := storage.NewAttachments( 248 u.st, unitTag, u.paths.State.StorageDir, u.tomb.Dying(), 249 ) 250 if err != nil { 251 return errors.Annotatef(err, "cannot create storage hook source") 252 } 253 u.storage = storageAttachments 254 u.addCleanup(storageAttachments.Stop) 255 256 deployer, err := charm.NewDeployer( 257 u.paths.State.CharmDir, 258 u.paths.State.DeployerDir, 259 charm.NewBundlesDir(u.paths.State.BundlesDir), 260 ) 261 if err != nil { 262 return errors.Annotatef(err, "cannot create deployer") 263 } 264 u.deployer = &deployerProxy{deployer} 265 runnerFactory, err := runner.NewFactory( 266 u.st, unitTag, u.leadershipTracker, u.relations.GetInfo, u.storage, u.paths, 267 ) 268 if err != nil { 269 return err 270 } 271 u.operationFactory = operation.NewFactory( 272 u.deployer, 273 runnerFactory, 274 &operationCallbacks{u}, 275 u.storage, 276 u.tomb.Dying(), 277 ) 278 279 operationExecutor, err := operation.NewExecutor( 280 u.paths.State.OperationsFile, u.getServiceCharmURL, u.acquireExecutionLock, 281 ) 282 if err != nil { 283 return err 284 } 285 u.operationExecutor = operationExecutor 286 287 logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket) 288 u.runListener, err = NewRunListener(u, u.paths.Runtime.JujuRunSocket) 289 if err != nil { 290 return err 291 } 292 u.addCleanup(func() error { 293 // TODO(fwereade): RunListener returns no error on Close. This seems wrong. 294 u.runListener.Close() 295 return nil 296 }) 297 // The socket needs to have permissions 777 in order for other users to use it. 298 if version.Current.OS != version.Windows { 299 return os.Chmod(u.paths.Runtime.JujuRunSocket, 0777) 300 } 301 return nil 302 } 303 304 func (u *Uniter) Kill() { 305 u.tomb.Kill(nil) 306 } 307 308 func (u *Uniter) Wait() error { 309 return u.tomb.Wait() 310 } 311 312 func (u *Uniter) Stop() error { 313 u.tomb.Kill(nil) 314 return u.Wait() 315 } 316 317 func (u *Uniter) Dead() <-chan struct{} { 318 return u.tomb.Dead() 319 } 320 321 func (u *Uniter) getServiceCharmURL() (*corecharm.URL, error) { 322 // TODO(fwereade): pretty sure there's no reason to make 2 API calls here. 323 service, err := u.st.Service(u.unit.ServiceTag()) 324 if err != nil { 325 return nil, err 326 } 327 charmURL, _, err := service.CharmURL() 328 return charmURL, err 329 } 330 331 func (u *Uniter) operationState() operation.State { 332 return u.operationExecutor.State() 333 } 334 335 // initializeMetricsCollector enables the periodic collect-metrics hook 336 // for charms that declare metrics. 337 func (u *Uniter) initializeMetricsCollector() error { 338 charm, err := corecharm.ReadCharmDir(u.paths.State.CharmDir) 339 if err != nil { 340 return err 341 } 342 u.collectMetricsAt = u.metricsTimerChooser.getMetricsTimer(charm) 343 return nil 344 } 345 346 // RunCommands executes the supplied commands in a hook context. 347 func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) { 348 // TODO(fwereade): this is *still* all sorts of messed-up and not especially 349 // goroutine-safe, but that's not what I'm fixing at the moment. We could 350 // address this by: 351 // 1) implementing an operation to encapsulate the relations.Update call 352 // 2) (quick+dirty) mutex runOperation until we can 353 // 3) (correct) feed RunCommands requests into the mode funcs (or any queue 354 // that replaces them) such that they're handled and prioritised like 355 // every other operation. 356 logger.Tracef("run commands: %s", args.Commands) 357 358 type responseInfo struct { 359 response *exec.ExecResponse 360 err error 361 } 362 responseChan := make(chan responseInfo, 1) 363 sendResponse := func(response *exec.ExecResponse, err error) { 364 responseChan <- responseInfo{response, err} 365 } 366 367 commandArgs := operation.CommandArgs{ 368 Commands: args.Commands, 369 RelationId: args.RelationId, 370 RemoteUnitName: args.RemoteUnitName, 371 ForceRemoteUnit: args.ForceRemoteUnit, 372 } 373 err = u.runOperation(newCommandsOp(commandArgs, sendResponse)) 374 if err == nil { 375 select { 376 case response := <-responseChan: 377 results, err = response.response, response.err 378 default: 379 err = errors.New("command response never sent") 380 } 381 } 382 if errors.Cause(err) == operation.ErrNeedsReboot { 383 u.tomb.Kill(worker.ErrRebootMachine) 384 err = nil 385 } 386 if err != nil { 387 u.tomb.Kill(err) 388 } 389 return results, err 390 } 391 392 // runOperation uses the uniter's operation factory to run the supplied creation 393 // func, and then runs the resulting operation. 394 // 395 // This has a number of advantages over having mode funcs use the factory and 396 // executor directly: 397 // * it cuts down on duplicated code in the mode funcs, making the logic easier 398 // to parse 399 // * it narrows the (conceptual) interface exposed to the mode funcs -- one day 400 // we might even be able to use a (real) interface and maybe even approach a 401 // point where we can run direct unit tests(!) on the modes themselves. 402 // * it opens a path to fixing RunCommands -- all operation creation and 403 // execution is done in a single place, and it's much easier to force those 404 // onto a single thread. 405 // * this can't be done quite yet, though, because relation changes are 406 // not yet encapsulated in operations, and that needs to happen before 407 // RunCommands will *actually* be goroutine-safe. 408 func (u *Uniter) runOperation(creator creator) error { 409 op, err := creator(u.operationFactory) 410 if err != nil { 411 return errors.Annotatef(err, "cannot create operation") 412 } 413 before := u.operationState() 414 defer func() { 415 // Check that if we lose leadership as a result of this 416 // operation, we want to start getting leader settings events, 417 // or if we gain leadership we want to stop receiving those 418 // events. 419 if after := u.operationState(); before.Leader != after.Leader { 420 u.f.WantLeaderSettingsEvents(before.Leader) 421 } 422 }() 423 return u.operationExecutor.Run(op) 424 } 425 426 // acquireExecutionLock acquires the machine-level execution lock, and 427 // returns a func that must be called to unlock it. It's used by operation.Executor 428 // when running operations that execute external code. 429 func (u *Uniter) acquireExecutionLock(message string) (func() error, error) { 430 // We want to make sure we don't block forever when locking, but take the 431 // Uniter's tomb into account. 432 checkTomb := func() error { 433 select { 434 case <-u.tomb.Dying(): 435 return tomb.ErrDying 436 default: 437 return nil 438 } 439 } 440 message = fmt.Sprintf("%s: %s", u.unit.Name(), message) 441 if err := u.hookLock.LockWithFunc(message, checkTomb); err != nil { 442 return nil, err 443 } 444 return func() error { return u.hookLock.Unlock() }, nil 445 }