github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/uniter/uniter.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package uniter
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/juju/errors"
    13  	"github.com/juju/loggo"
    14  	"github.com/juju/mutex"
    15  	"github.com/juju/utils"
    16  	"github.com/juju/utils/clock"
    17  	"github.com/juju/utils/exec"
    18  	corecharm "gopkg.in/juju/charm.v6-unstable"
    19  	"gopkg.in/juju/names.v2"
    20  
    21  	"github.com/juju/juju/api/uniter"
    22  	"github.com/juju/juju/apiserver/params"
    23  	"github.com/juju/juju/core/leadership"
    24  	"github.com/juju/juju/status"
    25  	"github.com/juju/juju/worker"
    26  	"github.com/juju/juju/worker/catacomb"
    27  	"github.com/juju/juju/worker/fortress"
    28  	"github.com/juju/juju/worker/uniter/actions"
    29  	"github.com/juju/juju/worker/uniter/charm"
    30  	"github.com/juju/juju/worker/uniter/hook"
    31  	uniterleadership "github.com/juju/juju/worker/uniter/leadership"
    32  	"github.com/juju/juju/worker/uniter/operation"
    33  	"github.com/juju/juju/worker/uniter/relation"
    34  	"github.com/juju/juju/worker/uniter/remotestate"
    35  	"github.com/juju/juju/worker/uniter/resolver"
    36  	"github.com/juju/juju/worker/uniter/runcommands"
    37  	"github.com/juju/juju/worker/uniter/runner"
    38  	"github.com/juju/juju/worker/uniter/runner/context"
    39  	"github.com/juju/juju/worker/uniter/runner/jujuc"
    40  	"github.com/juju/juju/worker/uniter/storage"
    41  	jujuos "github.com/juju/utils/os"
    42  )
    43  
    44  var logger = loggo.GetLogger("juju.worker.uniter")
    45  
    46  // A UniterExecutionObserver gets the appropriate methods called when a hook
    47  // is executed and either succeeds or fails.  Missing hooks don't get reported
    48  // in this way.
    49  type UniterExecutionObserver interface {
    50  	HookCompleted(hookName string)
    51  	HookFailed(hookName string)
    52  }
    53  
    54  // Uniter implements the capabilities of the unit agent. It is not intended to
    55  // implement the actual *behaviour* of the unit agent; that responsibility is
    56  // delegated to Mode values, which are expected to react to events and direct
    57  // the uniter's responses to them.
    58  type Uniter struct {
    59  	catacomb  catacomb.Catacomb
    60  	st        *uniter.State
    61  	paths     Paths
    62  	unit      *uniter.Unit
    63  	relations relation.Relations
    64  	storage   *storage.Attachments
    65  	clock     clock.Clock
    66  
    67  	// Cache the last reported status information
    68  	// so we don't make unnecessary api calls.
    69  	setStatusMutex      sync.Mutex
    70  	lastReportedStatus  status.Status
    71  	lastReportedMessage string
    72  
    73  	operationFactory     operation.Factory
    74  	operationExecutor    operation.Executor
    75  	newOperationExecutor NewExecutorFunc
    76  
    77  	leadershipTracker leadership.Tracker
    78  	charmDirGuard     fortress.Guard
    79  
    80  	hookLockName string
    81  
    82  	// TODO(axw) move the runListener and run-command code outside of the
    83  	// uniter, and introduce a separate worker. Each worker would feed
    84  	// operations to a single, synchronized runner to execute.
    85  	runListener    *RunListener
    86  	commands       runcommands.Commands
    87  	commandChannel chan string
    88  
    89  	// The execution observer is only used in tests at this stage. Should this
    90  	// need to be extended, perhaps a list of observers would be needed.
    91  	observer UniterExecutionObserver
    92  
    93  	// updateStatusAt defines a function that will be used to generate signals for
    94  	// the update-status hook
    95  	updateStatusAt func() <-chan time.Time
    96  
    97  	// hookRetryStrategy represents configuration for hook retries
    98  	hookRetryStrategy params.RetryStrategy
    99  
   100  	// downloader is the downloader that should be used to get the charm
   101  	// archive.
   102  	downloader charm.Downloader
   103  }
   104  
   105  // UniterParams hold all the necessary parameters for a new Uniter.
   106  type UniterParams struct {
   107  	UniterFacade         *uniter.State
   108  	UnitTag              names.UnitTag
   109  	LeadershipTracker    leadership.Tracker
   110  	DataDir              string
   111  	Downloader           charm.Downloader
   112  	MachineLockName      string
   113  	CharmDirGuard        fortress.Guard
   114  	UpdateStatusSignal   func() <-chan time.Time
   115  	HookRetryStrategy    params.RetryStrategy
   116  	NewOperationExecutor NewExecutorFunc
   117  	Clock                clock.Clock
   118  	// TODO (mattyw, wallyworld, fwereade) Having the observer here make this approach a bit more legitimate, but it isn't.
   119  	// the observer is only a stop gap to be used in tests. A better approach would be to have the uniter tests start hooks
   120  	// that write to files, and have the tests watch the output to know that hooks have finished.
   121  	Observer UniterExecutionObserver
   122  }
   123  
   124  type NewExecutorFunc func(string, func() (*corecharm.URL, error), func() (mutex.Releaser, error)) (operation.Executor, error)
   125  
   126  // NewUniter creates a new Uniter which will install, run, and upgrade
   127  // a charm on behalf of the unit with the given unitTag, by executing
   128  // hooks and operations provoked by changes in st.
   129  func NewUniter(uniterParams *UniterParams) (*Uniter, error) {
   130  	u := &Uniter{
   131  		st:                   uniterParams.UniterFacade,
   132  		paths:                NewPaths(uniterParams.DataDir, uniterParams.UnitTag),
   133  		hookLockName:         uniterParams.MachineLockName,
   134  		leadershipTracker:    uniterParams.LeadershipTracker,
   135  		charmDirGuard:        uniterParams.CharmDirGuard,
   136  		updateStatusAt:       uniterParams.UpdateStatusSignal,
   137  		hookRetryStrategy:    uniterParams.HookRetryStrategy,
   138  		newOperationExecutor: uniterParams.NewOperationExecutor,
   139  		observer:             uniterParams.Observer,
   140  		clock:                uniterParams.Clock,
   141  		downloader:           uniterParams.Downloader,
   142  	}
   143  	err := catacomb.Invoke(catacomb.Plan{
   144  		Site: &u.catacomb,
   145  		Work: func() error {
   146  			return u.loop(uniterParams.UnitTag)
   147  		},
   148  	})
   149  	return u, errors.Trace(err)
   150  }
   151  
   152  func (u *Uniter) loop(unitTag names.UnitTag) (err error) {
   153  	if err := u.init(unitTag); err != nil {
   154  		if err == worker.ErrTerminateAgent {
   155  			return err
   156  		}
   157  		return errors.Annotatef(err, "failed to initialize uniter for %q", unitTag)
   158  	}
   159  	logger.Infof("unit %q started", u.unit)
   160  
   161  	// Install is a special case, as it must run before there
   162  	// is any remote state, and before the remote state watcher
   163  	// is started.
   164  	var charmURL *corecharm.URL
   165  	var charmModifiedVersion int
   166  	opState := u.operationExecutor.State()
   167  	if opState.Kind == operation.Install {
   168  		logger.Infof("resuming charm install")
   169  		op, err := u.operationFactory.NewInstall(opState.CharmURL)
   170  		if err != nil {
   171  			return errors.Trace(err)
   172  		}
   173  		if err := u.operationExecutor.Run(op); err != nil {
   174  			return errors.Trace(err)
   175  		}
   176  		charmURL = opState.CharmURL
   177  	} else {
   178  		curl, err := u.unit.CharmURL()
   179  		if err != nil {
   180  			return errors.Trace(err)
   181  		}
   182  		charmURL = curl
   183  		svc, err := u.unit.Application()
   184  		if err != nil {
   185  			return errors.Trace(err)
   186  		}
   187  		charmModifiedVersion, err = svc.CharmModifiedVersion()
   188  		if err != nil {
   189  			return errors.Trace(err)
   190  		}
   191  	}
   192  
   193  	var (
   194  		watcher   *remotestate.RemoteStateWatcher
   195  		watcherMu sync.Mutex
   196  	)
   197  
   198  	logger.Infof("hooks are retried %v", u.hookRetryStrategy.ShouldRetry)
   199  	retryHookChan := make(chan struct{}, 1)
   200  	// TODO(katco): 2016-08-09: This type is deprecated: lp:1611427
   201  	retryHookTimer := utils.NewBackoffTimer(utils.BackoffTimerConfig{
   202  		Min:    u.hookRetryStrategy.MinRetryTime,
   203  		Max:    u.hookRetryStrategy.MaxRetryTime,
   204  		Jitter: u.hookRetryStrategy.JitterRetryTime,
   205  		Factor: u.hookRetryStrategy.RetryTimeFactor,
   206  		Func: func() {
   207  			// Don't try to send on the channel if it's already full
   208  			// This can happen if the timer fires off before the event is consumed
   209  			// by the resolver loop
   210  			select {
   211  			case retryHookChan <- struct{}{}:
   212  			default:
   213  			}
   214  		},
   215  		Clock: u.clock,
   216  	})
   217  	defer func() {
   218  		// Whenever we exit the uniter we want to stop a potentially
   219  		// running timer so it doesn't trigger for nothing.
   220  		retryHookTimer.Reset()
   221  	}()
   222  
   223  	restartWatcher := func() error {
   224  		watcherMu.Lock()
   225  		defer watcherMu.Unlock()
   226  
   227  		if watcher != nil {
   228  			// watcher added to catacomb, will kill uniter if there's an error.
   229  			worker.Stop(watcher)
   230  		}
   231  		var err error
   232  		watcher, err = remotestate.NewWatcher(
   233  			remotestate.WatcherConfig{
   234  				State:               remotestate.NewAPIState(u.st),
   235  				LeadershipTracker:   u.leadershipTracker,
   236  				UnitTag:             unitTag,
   237  				UpdateStatusChannel: u.updateStatusAt,
   238  				CommandChannel:      u.commandChannel,
   239  				RetryHookChannel:    retryHookChan,
   240  			})
   241  		if err != nil {
   242  			return errors.Trace(err)
   243  		}
   244  		if err := u.catacomb.Add(watcher); err != nil {
   245  			return errors.Trace(err)
   246  		}
   247  		return nil
   248  	}
   249  
   250  	onIdle := func() error {
   251  		opState := u.operationExecutor.State()
   252  		if opState.Kind != operation.Continue {
   253  			// We should only set idle status if we're in
   254  			// the "Continue" state, which indicates that
   255  			// there is nothing to do and we're not in an
   256  			// error state.
   257  			return nil
   258  		}
   259  		return setAgentStatus(u, status.Idle, "", nil)
   260  	}
   261  
   262  	clearResolved := func() error {
   263  		if err := u.unit.ClearResolved(); err != nil {
   264  			return errors.Trace(err)
   265  		}
   266  		watcher.ClearResolvedMode()
   267  		return nil
   268  	}
   269  
   270  	for {
   271  		if err = restartWatcher(); err != nil {
   272  			err = errors.Annotate(err, "(re)starting watcher")
   273  			break
   274  		}
   275  
   276  		uniterResolver := NewUniterResolver(ResolverConfig{
   277  			ClearResolved:       clearResolved,
   278  			ReportHookError:     u.reportHookError,
   279  			ShouldRetryHooks:    u.hookRetryStrategy.ShouldRetry,
   280  			StartRetryHookTimer: retryHookTimer.Start,
   281  			StopRetryHookTimer:  retryHookTimer.Reset,
   282  			Actions:             actions.NewResolver(),
   283  			Leadership:          uniterleadership.NewResolver(),
   284  			Relations:           relation.NewRelationsResolver(u.relations),
   285  			Storage:             storage.NewResolver(u.storage),
   286  			Commands: runcommands.NewCommandsResolver(
   287  				u.commands, watcher.CommandCompleted,
   288  			),
   289  		})
   290  
   291  		// We should not do anything until there has been a change
   292  		// to the remote state. The watcher will trigger at least
   293  		// once initially.
   294  		select {
   295  		case <-u.catacomb.Dying():
   296  			return u.catacomb.ErrDying()
   297  		case <-watcher.RemoteStateChanged():
   298  		}
   299  
   300  		localState := resolver.LocalState{
   301  			CharmURL:             charmURL,
   302  			CharmModifiedVersion: charmModifiedVersion,
   303  		}
   304  		for err == nil {
   305  			err = resolver.Loop(resolver.LoopConfig{
   306  				Resolver:      uniterResolver,
   307  				Watcher:       watcher,
   308  				Executor:      u.operationExecutor,
   309  				Factory:       u.operationFactory,
   310  				Abort:         u.catacomb.Dying(),
   311  				OnIdle:        onIdle,
   312  				CharmDirGuard: u.charmDirGuard,
   313  			}, &localState)
   314  			switch cause := errors.Cause(err); cause {
   315  			case nil:
   316  				// Loop back around.
   317  			case resolver.ErrLoopAborted:
   318  				err = u.catacomb.ErrDying()
   319  			case operation.ErrNeedsReboot:
   320  				err = worker.ErrRebootMachine
   321  			case operation.ErrHookFailed:
   322  				// Loop back around. The resolver can tell that it is in
   323  				// an error state by inspecting the operation state.
   324  				err = nil
   325  			case resolver.ErrTerminate:
   326  				err = u.terminate()
   327  			case resolver.ErrRestart:
   328  				// make sure we update the two values used above in
   329  				// creating LocalState.
   330  				charmURL = localState.CharmURL
   331  				charmModifiedVersion = localState.CharmModifiedVersion
   332  				// leave err assigned, causing loop to break
   333  			default:
   334  				// We need to set conflicted from here, because error
   335  				// handling is outside of the resolver's control.
   336  				if operation.IsDeployConflictError(cause) {
   337  					localState.Conflicted = true
   338  					err = setAgentStatus(u, status.Error, "upgrade failed", nil)
   339  				} else {
   340  					reportAgentError(u, "resolver loop error", err)
   341  				}
   342  			}
   343  		}
   344  
   345  		if errors.Cause(err) != resolver.ErrRestart {
   346  			break
   347  		}
   348  	}
   349  
   350  	logger.Infof("unit %q shutting down: %s", u.unit, err)
   351  	return err
   352  }
   353  
   354  func (u *Uniter) terminate() error {
   355  	unitWatcher, err := u.unit.Watch()
   356  	if err != nil {
   357  		return errors.Trace(err)
   358  	}
   359  	if err := u.catacomb.Add(unitWatcher); err != nil {
   360  		return errors.Trace(err)
   361  	}
   362  	for {
   363  		select {
   364  		case <-u.catacomb.Dying():
   365  			return u.catacomb.ErrDying()
   366  		case _, ok := <-unitWatcher.Changes():
   367  			if !ok {
   368  				return errors.New("unit watcher closed")
   369  			}
   370  			if err := u.unit.Refresh(); err != nil {
   371  				return errors.Trace(err)
   372  			}
   373  			if hasSubs, err := u.unit.HasSubordinates(); err != nil {
   374  				return errors.Trace(err)
   375  			} else if hasSubs {
   376  				continue
   377  			}
   378  			// The unit is known to be Dying; so if it didn't have subordinates
   379  			// just above, it can't acquire new ones before this call.
   380  			if err := u.unit.EnsureDead(); err != nil {
   381  				return errors.Trace(err)
   382  			}
   383  			return worker.ErrTerminateAgent
   384  		}
   385  	}
   386  }
   387  
   388  func (u *Uniter) init(unitTag names.UnitTag) (err error) {
   389  	u.unit, err = u.st.Unit(unitTag)
   390  	if err != nil {
   391  		return err
   392  	}
   393  	if u.unit.Life() == params.Dead {
   394  		// If we started up already dead, we should not progress further. If we
   395  		// become Dead immediately after starting up, we may well complete any
   396  		// operations in progress before detecting it; but that race is fundamental
   397  		// and inescapable, whereas this one is not.
   398  		return worker.ErrTerminateAgent
   399  	}
   400  	// If initialising for the first time after deploying, update the status.
   401  	currentStatus, err := u.unit.UnitStatus()
   402  	if err != nil {
   403  		return err
   404  	}
   405  	// TODO(fwereade/wallyworld): we should have an explicit place in the model
   406  	// to tell us when we've hit this point, instead of piggybacking on top of
   407  	// status and/or status history.
   408  	// If the previous status was waiting for machine, we transition to the next step.
   409  	if currentStatus.Status == string(status.Waiting) &&
   410  		(currentStatus.Info == status.MessageWaitForMachine || currentStatus.Info == status.MessageInstallingAgent) {
   411  		if err := u.unit.SetUnitStatus(status.Waiting, status.MessageInitializingAgent, nil); err != nil {
   412  			return errors.Trace(err)
   413  		}
   414  	}
   415  	if err := jujuc.EnsureSymlinks(u.paths.ToolsDir); err != nil {
   416  		return err
   417  	}
   418  	if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil {
   419  		return errors.Trace(err)
   420  	}
   421  	relations, err := relation.NewRelations(
   422  		u.st, unitTag, u.paths.State.CharmDir,
   423  		u.paths.State.RelationsDir, u.catacomb.Dying(),
   424  	)
   425  	if err != nil {
   426  		return errors.Annotatef(err, "cannot create relations")
   427  	}
   428  	u.relations = relations
   429  	storageAttachments, err := storage.NewAttachments(
   430  		u.st, unitTag, u.paths.State.StorageDir, u.catacomb.Dying(),
   431  	)
   432  	if err != nil {
   433  		return errors.Annotatef(err, "cannot create storage hook source")
   434  	}
   435  	u.storage = storageAttachments
   436  	u.commands = runcommands.NewCommands()
   437  	u.commandChannel = make(chan string)
   438  
   439  	if err := charm.ClearDownloads(u.paths.State.BundlesDir); err != nil {
   440  		logger.Warningf(err.Error())
   441  	}
   442  	deployer, err := charm.NewDeployer(
   443  		u.paths.State.CharmDir,
   444  		u.paths.State.DeployerDir,
   445  		charm.NewBundlesDir(u.paths.State.BundlesDir, u.downloader),
   446  	)
   447  	if err != nil {
   448  		return errors.Annotatef(err, "cannot create deployer")
   449  	}
   450  	contextFactory, err := context.NewContextFactory(
   451  		u.st, unitTag, u.leadershipTracker, u.relations.GetInfo, u.storage, u.paths, u.clock,
   452  	)
   453  	if err != nil {
   454  		return err
   455  	}
   456  	runnerFactory, err := runner.NewFactory(
   457  		u.st, u.paths, contextFactory,
   458  	)
   459  	if err != nil {
   460  		return errors.Trace(err)
   461  	}
   462  	u.operationFactory = operation.NewFactory(operation.FactoryParams{
   463  		Deployer:       deployer,
   464  		RunnerFactory:  runnerFactory,
   465  		Callbacks:      &operationCallbacks{u},
   466  		Abort:          u.catacomb.Dying(),
   467  		MetricSpoolDir: u.paths.GetMetricsSpoolDir(),
   468  	})
   469  
   470  	operationExecutor, err := u.newOperationExecutor(u.paths.State.OperationsFile, u.getServiceCharmURL, u.acquireExecutionLock)
   471  	if err != nil {
   472  		return errors.Trace(err)
   473  	}
   474  	u.operationExecutor = operationExecutor
   475  
   476  	logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket)
   477  	commandRunner, err := NewChannelCommandRunner(ChannelCommandRunnerConfig{
   478  		Abort:          u.catacomb.Dying(),
   479  		Commands:       u.commands,
   480  		CommandChannel: u.commandChannel,
   481  	})
   482  	if err != nil {
   483  		return errors.Annotate(err, "creating command runner")
   484  	}
   485  	u.runListener, err = NewRunListener(RunListenerConfig{
   486  		SocketPath:    u.paths.Runtime.JujuRunSocket,
   487  		CommandRunner: commandRunner,
   488  	})
   489  	if err != nil {
   490  		return errors.Trace(err)
   491  	}
   492  	rlw := newRunListenerWrapper(u.runListener)
   493  	if err := u.catacomb.Add(rlw); err != nil {
   494  		return errors.Trace(err)
   495  	}
   496  	// The socket needs to have permissions 777 in order for other users to use it.
   497  	if jujuos.HostOS() != jujuos.Windows {
   498  		return os.Chmod(u.paths.Runtime.JujuRunSocket, 0777)
   499  	}
   500  	return nil
   501  }
   502  
   503  func (u *Uniter) Kill() {
   504  	u.catacomb.Kill(nil)
   505  }
   506  
   507  func (u *Uniter) Wait() error {
   508  	return u.catacomb.Wait()
   509  }
   510  
   511  func (u *Uniter) getServiceCharmURL() (*corecharm.URL, error) {
   512  	// TODO(fwereade): pretty sure there's no reason to make 2 API calls here.
   513  	service, err := u.st.Application(u.unit.ApplicationTag())
   514  	if err != nil {
   515  		return nil, err
   516  	}
   517  	charmURL, _, err := service.CharmURL()
   518  	return charmURL, err
   519  }
   520  
   521  // RunCommands executes the supplied commands in a hook context.
   522  func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) {
   523  	// TODO(axw) drop this when we move the run-listener to an independent
   524  	// worker. This exists purely for the tests.
   525  	return u.runListener.RunCommands(args)
   526  }
   527  
   528  // acquireExecutionLock acquires the machine-level execution lock, and
   529  // returns a func that must be called to unlock it. It's used by operation.Executor
   530  // when running operations that execute external code.
   531  func (u *Uniter) acquireExecutionLock() (mutex.Releaser, error) {
   532  	// We want to make sure we don't block forever when locking, but take the
   533  	// Uniter's catacomb into account.
   534  	spec := mutex.Spec{
   535  		Name:   u.hookLockName,
   536  		Clock:  u.clock,
   537  		Delay:  250 * time.Millisecond,
   538  		Cancel: u.catacomb.Dying(),
   539  	}
   540  	logger.Debugf("acquire lock %q for uniter hook execution", u.hookLockName)
   541  	releaser, err := mutex.Acquire(spec)
   542  	if err != nil {
   543  		return nil, errors.Trace(err)
   544  	}
   545  	logger.Debugf("lock %q acquired", u.hookLockName)
   546  	return releaser, nil
   547  }
   548  
   549  func (u *Uniter) reportHookError(hookInfo hook.Info) error {
   550  	// Set the agent status to "error". We must do this here in case the
   551  	// hook is interrupted (e.g. unit agent crashes), rather than immediately
   552  	// after attempting a runHookOp.
   553  	hookName := string(hookInfo.Kind)
   554  	statusData := map[string]interface{}{}
   555  	if hookInfo.Kind.IsRelation() {
   556  		statusData["relation-id"] = hookInfo.RelationId
   557  		if hookInfo.RemoteUnit != "" {
   558  			statusData["remote-unit"] = hookInfo.RemoteUnit
   559  		}
   560  		relationName, err := u.relations.Name(hookInfo.RelationId)
   561  		if err != nil {
   562  			return errors.Trace(err)
   563  		}
   564  		hookName = fmt.Sprintf("%s-%s", relationName, hookInfo.Kind)
   565  	}
   566  	statusData["hook"] = hookName
   567  	statusMessage := fmt.Sprintf("hook failed: %q", hookName)
   568  	return setAgentStatus(u, status.Error, statusMessage, statusData)
   569  }