github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/worker/uniter/uniter.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package uniter
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/juju/errors"
    14  	"github.com/juju/loggo"
    15  	"github.com/juju/names"
    16  	"github.com/juju/utils/exec"
    17  	"github.com/juju/utils/fslock"
    18  	corecharm "gopkg.in/juju/charm.v5"
    19  	"launchpad.net/tomb"
    20  
    21  	"github.com/juju/juju/api/uniter"
    22  	"github.com/juju/juju/apiserver/params"
    23  	"github.com/juju/juju/version"
    24  	"github.com/juju/juju/worker"
    25  	"github.com/juju/juju/worker/leadership"
    26  	"github.com/juju/juju/worker/uniter/charm"
    27  	"github.com/juju/juju/worker/uniter/filter"
    28  	"github.com/juju/juju/worker/uniter/operation"
    29  	"github.com/juju/juju/worker/uniter/runner"
    30  	"github.com/juju/juju/worker/uniter/runner/jujuc"
    31  	"github.com/juju/juju/worker/uniter/storage"
    32  )
    33  
    34  var logger = loggo.GetLogger("juju.worker.uniter")
    35  
    36  // leadershipGuarantee defines the period of time for which a successful call
    37  // to the is-leader hook tool guarantees continued leadership.
    38  var leadershipGuarantee = 30 * time.Second
    39  
    40  // A UniterExecutionObserver gets the appropriate methods called when a hook
    41  // is executed and either succeeds or fails.  Missing hooks don't get reported
    42  // in this way.
    43  type UniterExecutionObserver interface {
    44  	HookCompleted(hookName string)
    45  	HookFailed(hookName string)
    46  }
    47  
    48  // Uniter implements the capabilities of the unit agent. It is not intended to
    49  // implement the actual *behaviour* of the unit agent; that responsibility is
    50  // delegated to Mode values, which are expected to react to events and direct
    51  // the uniter's responses to them.
    52  type Uniter struct {
    53  	tomb      tomb.Tomb
    54  	st        *uniter.State
    55  	paths     Paths
    56  	f         filter.Filter
    57  	unit      *uniter.Unit
    58  	relations Relations
    59  	cleanups  []cleanup
    60  	storage   *storage.Attachments
    61  
    62  	// Cache the last reported status information
    63  	// so we don't make unnecessary api calls.
    64  	setStatusMutex      sync.Mutex
    65  	lastReportedStatus  params.Status
    66  	lastReportedMessage string
    67  
    68  	deployer             *deployerProxy
    69  	operationFactory     operation.Factory
    70  	operationExecutor    operation.Executor
    71  	newOperationExecutor NewExecutorFunc
    72  
    73  	leadershipTracker leadership.Tracker
    74  
    75  	hookLock    *fslock.Lock
    76  	runListener *RunListener
    77  
    78  	ranLeaderSettingsChanged bool
    79  	ranConfigChanged         bool
    80  
    81  	// The execution observer is only used in tests at this stage. Should this
    82  	// need to be extended, perhaps a list of observers would be needed.
    83  	observer UniterExecutionObserver
    84  
    85  	// metricsTimerChooser is a struct that allows metrics to switch between
    86  	// active and inactive timers.
    87  	metricsTimerChooser *timerChooser
    88  
    89  	// collectMetricsAt defines a function that will be used to generate signals
    90  	// for the collect-metrics hook.
    91  	collectMetricsAt TimedSignal
    92  
    93  	// sendMetricsAt defines a function that will be used to generate signals
    94  	// to send metrics to the state server.
    95  	sendMetricsAt TimedSignal
    96  
    97  	// updateStatusAt defines a function that will be used to generate signals for
    98  	// the update-status hook
    99  	updateStatusAt TimedSignal
   100  }
   101  
   102  // UniterParams hold all the necessary parameters for a new Uniter.
   103  type UniterParams struct {
   104  	UniterFacade         *uniter.State
   105  	UnitTag              names.UnitTag
   106  	LeadershipTracker    leadership.Tracker
   107  	DataDir              string
   108  	MachineLock          *fslock.Lock
   109  	MetricsTimerChooser  *timerChooser
   110  	UpdateStatusSignal   TimedSignal
   111  	NewOperationExecutor NewExecutorFunc
   112  }
   113  
   114  type NewExecutorFunc func(string, func() (*corecharm.URL, error), func(string) (func() error, error)) (operation.Executor, error)
   115  
   116  // NewUniter creates a new Uniter which will install, run, and upgrade
   117  // a charm on behalf of the unit with the given unitTag, by executing
   118  // hooks and operations provoked by changes in st.
   119  func NewUniter(uniterParams *UniterParams) *Uniter {
   120  	u := &Uniter{
   121  		st:                   uniterParams.UniterFacade,
   122  		paths:                NewPaths(uniterParams.DataDir, uniterParams.UnitTag),
   123  		hookLock:             uniterParams.MachineLock,
   124  		leadershipTracker:    uniterParams.LeadershipTracker,
   125  		metricsTimerChooser:  uniterParams.MetricsTimerChooser,
   126  		collectMetricsAt:     uniterParams.MetricsTimerChooser.inactive,
   127  		sendMetricsAt:        uniterParams.MetricsTimerChooser.inactive,
   128  		updateStatusAt:       uniterParams.UpdateStatusSignal,
   129  		newOperationExecutor: uniterParams.NewOperationExecutor,
   130  	}
   131  	go func() {
   132  		defer u.tomb.Done()
   133  		defer u.runCleanups()
   134  		u.tomb.Kill(u.loop(uniterParams.UnitTag))
   135  	}()
   136  	return u
   137  }
   138  
   139  type cleanup func() error
   140  
   141  func (u *Uniter) addCleanup(cleanup cleanup) {
   142  	u.cleanups = append(u.cleanups, cleanup)
   143  }
   144  
   145  func (u *Uniter) runCleanups() {
   146  	for _, cleanup := range u.cleanups {
   147  		u.tomb.Kill(cleanup())
   148  	}
   149  }
   150  
   151  func (u *Uniter) loop(unitTag names.UnitTag) (err error) {
   152  	if err := u.init(unitTag); err != nil {
   153  		if err == worker.ErrTerminateAgent {
   154  			return err
   155  		}
   156  		return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err)
   157  	}
   158  	logger.Infof("unit %q started", u.unit)
   159  
   160  	// Start filtering state change events for consumption by modes.
   161  	u.f, err = filter.NewFilter(u.st, unitTag)
   162  	if err != nil {
   163  		return err
   164  	}
   165  	u.addCleanup(u.f.Stop)
   166  
   167  	// Stop the uniter if the filter fails.
   168  	go func() { u.tomb.Kill(u.f.Wait()) }()
   169  
   170  	// Start handling leader settings events, or not, as appropriate.
   171  	u.f.WantLeaderSettingsEvents(!u.operationState().Leader)
   172  
   173  	// Run modes until we encounter an error.
   174  	mode := ModeContinue
   175  	for err == nil {
   176  		select {
   177  		case <-u.tomb.Dying():
   178  			err = tomb.ErrDying
   179  		default:
   180  			mode, err = mode(u)
   181  			switch cause := errors.Cause(err); cause {
   182  			case operation.ErrNeedsReboot:
   183  				err = worker.ErrRebootMachine
   184  			case tomb.ErrDying, worker.ErrTerminateAgent:
   185  				err = cause
   186  			case operation.ErrHookFailed:
   187  				mode, err = ModeHookError, nil
   188  			default:
   189  				charmURL, ok := operation.DeployConflictCharmURL(cause)
   190  				if ok {
   191  					mode, err = ModeConflicted(charmURL), nil
   192  				}
   193  			}
   194  		}
   195  	}
   196  
   197  	logger.Infof("unit %q shutting down: %s", u.unit, err)
   198  	return err
   199  }
   200  
   201  func (u *Uniter) setupLocks() (err error) {
   202  	if message := u.hookLock.Message(); u.hookLock.IsLocked() && message != "" {
   203  		// Look to see if it was us that held the lock before.  If it was, we
   204  		// should be safe enough to break it, as it is likely that we died
   205  		// before unlocking, and have been restarted by the init system.
   206  		parts := strings.SplitN(message, ":", 2)
   207  		if len(parts) > 1 && parts[0] == u.unit.Name() {
   208  			if err := u.hookLock.BreakLock(); err != nil {
   209  				return err
   210  			}
   211  		}
   212  	}
   213  	return nil
   214  }
   215  
   216  func (u *Uniter) init(unitTag names.UnitTag) (err error) {
   217  	u.unit, err = u.st.Unit(unitTag)
   218  	if err != nil {
   219  		return err
   220  	}
   221  	if u.unit.Life() == params.Dead {
   222  		// If we started up already dead, we should not progress further. If we
   223  		// become Dead immediately after starting up, we may well complete any
   224  		// operations in progress before detecting it; but that race is fundamental
   225  		// and inescapable, whereas this one is not.
   226  		return worker.ErrTerminateAgent
   227  	}
   228  	if err = u.setupLocks(); err != nil {
   229  		return err
   230  	}
   231  	if err := jujuc.EnsureSymlinks(u.paths.ToolsDir); err != nil {
   232  		return err
   233  	}
   234  	if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil {
   235  		return errors.Trace(err)
   236  	}
   237  	relations, err := newRelations(u.st, unitTag, u.paths, u.tomb.Dying())
   238  	if err != nil {
   239  		return errors.Annotatef(err, "cannot create relations")
   240  	}
   241  	u.relations = relations
   242  	storageAttachments, err := storage.NewAttachments(
   243  		u.st, unitTag, u.paths.State.StorageDir, u.tomb.Dying(),
   244  	)
   245  	if err != nil {
   246  		return errors.Annotatef(err, "cannot create storage hook source")
   247  	}
   248  	u.storage = storageAttachments
   249  	u.addCleanup(storageAttachments.Stop)
   250  
   251  	deployer, err := charm.NewDeployer(
   252  		u.paths.State.CharmDir,
   253  		u.paths.State.DeployerDir,
   254  		charm.NewBundlesDir(u.paths.State.BundlesDir),
   255  	)
   256  	if err != nil {
   257  		return errors.Annotatef(err, "cannot create deployer")
   258  	}
   259  	u.deployer = &deployerProxy{deployer}
   260  	contextFactory, err := runner.NewContextFactory(
   261  		u.st, unitTag, u.leadershipTracker, u.relations.GetInfo, u.storage, u.paths,
   262  	)
   263  	if err != nil {
   264  		return err
   265  	}
   266  	runnerFactory, err := runner.NewFactory(
   267  		u.st, u.paths, contextFactory,
   268  	)
   269  	if err != nil {
   270  		return err
   271  	}
   272  	u.operationFactory = operation.NewFactory(operation.FactoryParams{
   273  		Deployer:       u.deployer,
   274  		RunnerFactory:  runnerFactory,
   275  		Callbacks:      &operationCallbacks{u},
   276  		StorageUpdater: u.storage,
   277  		Abort:          u.tomb.Dying(),
   278  		MetricSender:   u.unit,
   279  		MetricSpoolDir: u.paths.GetMetricsSpoolDir(),
   280  	})
   281  
   282  	operationExecutor, err := u.newOperationExecutor(u.paths.State.OperationsFile, u.getServiceCharmURL, u.acquireExecutionLock)
   283  	if err != nil {
   284  		return err
   285  	}
   286  	u.operationExecutor = operationExecutor
   287  
   288  	logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket)
   289  	u.runListener, err = NewRunListener(u, u.paths.Runtime.JujuRunSocket)
   290  	if err != nil {
   291  		return err
   292  	}
   293  	u.addCleanup(func() error {
   294  		// TODO(fwereade): RunListener returns no error on Close. This seems wrong.
   295  		u.runListener.Close()
   296  		return nil
   297  	})
   298  	// The socket needs to have permissions 777 in order for other users to use it.
   299  	if version.Current.OS != version.Windows {
   300  		return os.Chmod(u.paths.Runtime.JujuRunSocket, 0777)
   301  	}
   302  	return nil
   303  }
   304  
   305  func (u *Uniter) Kill() {
   306  	u.tomb.Kill(nil)
   307  }
   308  
   309  func (u *Uniter) Wait() error {
   310  	return u.tomb.Wait()
   311  }
   312  
   313  func (u *Uniter) Stop() error {
   314  	u.tomb.Kill(nil)
   315  	return u.Wait()
   316  }
   317  
   318  func (u *Uniter) Dead() <-chan struct{} {
   319  	return u.tomb.Dead()
   320  }
   321  
   322  func (u *Uniter) getServiceCharmURL() (*corecharm.URL, error) {
   323  	// TODO(fwereade): pretty sure there's no reason to make 2 API calls here.
   324  	service, err := u.st.Service(u.unit.ServiceTag())
   325  	if err != nil {
   326  		return nil, err
   327  	}
   328  	charmURL, _, err := service.CharmURL()
   329  	return charmURL, err
   330  }
   331  
   332  func (u *Uniter) operationState() operation.State {
   333  	return u.operationExecutor.State()
   334  }
   335  
   336  // initializeMetricsTimers enables the periodic collect-metrics hook
   337  // and periodic sending of collected metrics for charms that declare metrics.
   338  func (u *Uniter) initializeMetricsTimers() error {
   339  	charm, err := corecharm.ReadCharmDir(u.paths.State.CharmDir)
   340  	if err != nil {
   341  		return err
   342  	}
   343  	u.collectMetricsAt = u.metricsTimerChooser.getCollectMetricsTimer(charm)
   344  	u.sendMetricsAt = u.metricsTimerChooser.getSendMetricsTimer(charm)
   345  	return nil
   346  }
   347  
   348  // RunCommands executes the supplied commands in a hook context.
   349  func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) {
   350  	// TODO(fwereade): this is *still* all sorts of messed-up and not especially
   351  	// goroutine-safe, but that's not what I'm fixing at the moment. We could
   352  	// address this by:
   353  	//  1) implementing an operation to encapsulate the relations.Update call
   354  	//  2) (quick+dirty) mutex runOperation until we can
   355  	//  3) (correct) feed RunCommands requests into the mode funcs (or any queue
   356  	//     that replaces them) such that they're handled and prioritised like
   357  	//     every other operation.
   358  	logger.Tracef("run commands: %s", args.Commands)
   359  
   360  	type responseInfo struct {
   361  		response *exec.ExecResponse
   362  		err      error
   363  	}
   364  	responseChan := make(chan responseInfo, 1)
   365  	sendResponse := func(response *exec.ExecResponse, err error) {
   366  		responseChan <- responseInfo{response, err}
   367  	}
   368  
   369  	commandArgs := operation.CommandArgs{
   370  		Commands:        args.Commands,
   371  		RelationId:      args.RelationId,
   372  		RemoteUnitName:  args.RemoteUnitName,
   373  		ForceRemoteUnit: args.ForceRemoteUnit,
   374  	}
   375  	err = u.runOperation(newCommandsOp(commandArgs, sendResponse))
   376  	if err == nil {
   377  		select {
   378  		case response := <-responseChan:
   379  			results, err = response.response, response.err
   380  		default:
   381  			err = errors.New("command response never sent")
   382  		}
   383  	}
   384  	if errors.Cause(err) == operation.ErrNeedsReboot {
   385  		u.tomb.Kill(worker.ErrRebootMachine)
   386  		err = nil
   387  	}
   388  	if err != nil {
   389  		u.tomb.Kill(err)
   390  	}
   391  	return results, err
   392  }
   393  
   394  // runOperation uses the uniter's operation factory to run the supplied creation
   395  // func, and then runs the resulting operation.
   396  //
   397  // This has a number of advantages over having mode funcs use the factory and
   398  // executor directly:
   399  //   * it cuts down on duplicated code in the mode funcs, making the logic easier
   400  //     to parse
   401  //   * it narrows the (conceptual) interface exposed to the mode funcs -- one day
   402  //     we might even be able to use a (real) interface and maybe even approach a
   403  //     point where we can run direct unit tests(!) on the modes themselves.
   404  //   * it opens a path to fixing RunCommands -- all operation creation and
   405  //     execution is done in a single place, and it's much easier to force those
   406  //     onto a single thread.
   407  //       * this can't be done quite yet, though, because relation changes are
   408  //         not yet encapsulated in operations, and that needs to happen before
   409  //         RunCommands will *actually* be goroutine-safe.
   410  func (u *Uniter) runOperation(creator creator) (err error) {
   411  	errorMessage := "creating operation to run"
   412  	defer func() {
   413  		reportAgentError(u, errorMessage, err)
   414  	}()
   415  	op, err := creator(u.operationFactory)
   416  	if err != nil {
   417  		return errors.Annotatef(err, "cannot create operation")
   418  	}
   419  	errorMessage = op.String()
   420  	before := u.operationState()
   421  	defer func() {
   422  		// Check that if we lose leadership as a result of this
   423  		// operation, we want to start getting leader settings events,
   424  		// or if we gain leadership we want to stop receiving those
   425  		// events.
   426  		if after := u.operationState(); before.Leader != after.Leader {
   427  			u.f.WantLeaderSettingsEvents(before.Leader)
   428  		}
   429  	}()
   430  	return u.operationExecutor.Run(op)
   431  }
   432  
   433  // acquireExecutionLock acquires the machine-level execution lock, and
   434  // returns a func that must be called to unlock it. It's used by operation.Executor
   435  // when running operations that execute external code.
   436  func (u *Uniter) acquireExecutionLock(message string) (func() error, error) {
   437  	// We want to make sure we don't block forever when locking, but take the
   438  	// Uniter's tomb into account.
   439  	checkTomb := func() error {
   440  		select {
   441  		case <-u.tomb.Dying():
   442  			return tomb.ErrDying
   443  		default:
   444  			return nil
   445  		}
   446  	}
   447  	message = fmt.Sprintf("%s: %s", u.unit.Name(), message)
   448  	if err := u.hookLock.LockWithFunc(message, checkTomb); err != nil {
   449  		return nil, err
   450  	}
   451  	return func() error { return u.hookLock.Unlock() }, nil
   452  }