github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/worker/uniter/uniter.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package uniter
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/juju/errors"
    14  	"github.com/juju/loggo"
    15  	"github.com/juju/names"
    16  	"github.com/juju/utils/exec"
    17  	"github.com/juju/utils/fslock"
    18  	corecharm "gopkg.in/juju/charm.v5"
    19  	"launchpad.net/tomb"
    20  
    21  	"github.com/juju/juju/api/uniter"
    22  	"github.com/juju/juju/apiserver/params"
    23  	coreleadership "github.com/juju/juju/leadership"
    24  	"github.com/juju/juju/version"
    25  	"github.com/juju/juju/worker"
    26  	"github.com/juju/juju/worker/leadership"
    27  	"github.com/juju/juju/worker/uniter/charm"
    28  	"github.com/juju/juju/worker/uniter/filter"
    29  	"github.com/juju/juju/worker/uniter/operation"
    30  	"github.com/juju/juju/worker/uniter/runner"
    31  	"github.com/juju/juju/worker/uniter/runner/jujuc"
    32  	"github.com/juju/juju/worker/uniter/storage"
    33  )
    34  
    35  var logger = loggo.GetLogger("juju.worker.uniter")
    36  
    37  // leadershipGuarantee defines the period of time for which a successful call
    38  // to the is-leader hook tool guarantees continued leadership.
    39  var leadershipGuarantee = 30 * time.Second
    40  
    41  // A UniterExecutionObserver gets the appropriate methods called when a hook
    42  // is executed and either succeeds or fails.  Missing hooks don't get reported
    43  // in this way.
    44  type UniterExecutionObserver interface {
    45  	HookCompleted(hookName string)
    46  	HookFailed(hookName string)
    47  }
    48  
    49  // Uniter implements the capabilities of the unit agent. It is not intended to
    50  // implement the actual *behaviour* of the unit agent; that responsibility is
    51  // delegated to Mode values, which are expected to react to events and direct
    52  // the uniter's responses to them.
    53  type Uniter struct {
    54  	tomb      tomb.Tomb
    55  	st        *uniter.State
    56  	paths     Paths
    57  	f         filter.Filter
    58  	unit      *uniter.Unit
    59  	relations Relations
    60  	cleanups  []cleanup
    61  	storage   *storage.Attachments
    62  
    63  	// Cache the last reported status information
    64  	// so we don't make unnecessary api calls.
    65  	setStatusMutex      sync.Mutex
    66  	lastReportedStatus  params.Status
    67  	lastReportedMessage string
    68  
    69  	deployer          *deployerProxy
    70  	operationFactory  operation.Factory
    71  	operationExecutor operation.Executor
    72  
    73  	leadershipManager coreleadership.LeadershipManager
    74  	leadershipTracker leadership.Tracker
    75  
    76  	hookLock    *fslock.Lock
    77  	runListener *RunListener
    78  
    79  	ranLeaderSettingsChanged bool
    80  	ranConfigChanged         bool
    81  
    82  	// The execution observer is only used in tests at this stage. Should this
    83  	// need to be extended, perhaps a list of observers would be needed.
    84  	observer UniterExecutionObserver
    85  
    86  	// metricsTimerChooser is a struct that allows metrics to switch between
    87  	// active and inactive timers.
    88  	metricsTimerChooser *timerChooser
    89  
    90  	// collectMetricsAt defines a function that will be used to generate signals
    91  	// for the collect-metrics hook.
    92  	collectMetricsAt TimedSignal
    93  
    94  	// updateStatusAt defines a function that will be used to generate signals for
    95  	// the update-status hook
    96  	updateStatusAt TimedSignal
    97  }
    98  
    99  // UniterParams hold all the necessary parameters for a new Uniter.
   100  type UniterParams struct {
   101  	St                  *uniter.State
   102  	UnitTag             names.UnitTag
   103  	LeadershipManager   coreleadership.LeadershipManager
   104  	DataDir             string
   105  	HookLock            *fslock.Lock
   106  	MetricsTimerChooser *timerChooser
   107  	UpdateStatusSignal  TimedSignal
   108  }
   109  
   110  // NewUniter creates a new Uniter which will install, run, and upgrade
   111  // a charm on behalf of the unit with the given unitTag, by executing
   112  // hooks and operations provoked by changes in st.
   113  func NewUniter(uniterParams *UniterParams) *Uniter {
   114  	u := &Uniter{
   115  		st:                  uniterParams.St,
   116  		paths:               NewPaths(uniterParams.DataDir, uniterParams.UnitTag),
   117  		hookLock:            uniterParams.HookLock,
   118  		leadershipManager:   uniterParams.LeadershipManager,
   119  		metricsTimerChooser: uniterParams.MetricsTimerChooser,
   120  		collectMetricsAt:    uniterParams.MetricsTimerChooser.inactive,
   121  		updateStatusAt:      uniterParams.UpdateStatusSignal,
   122  	}
   123  	go func() {
   124  		defer u.tomb.Done()
   125  		defer u.runCleanups()
   126  		u.tomb.Kill(u.loop(uniterParams.UnitTag))
   127  	}()
   128  	return u
   129  }
   130  
   131  type cleanup func() error
   132  
   133  func (u *Uniter) addCleanup(cleanup cleanup) {
   134  	u.cleanups = append(u.cleanups, cleanup)
   135  }
   136  
   137  func (u *Uniter) runCleanups() {
   138  	for _, cleanup := range u.cleanups {
   139  		u.tomb.Kill(cleanup())
   140  	}
   141  }
   142  
   143  func (u *Uniter) loop(unitTag names.UnitTag) (err error) {
   144  	// Start tracking leadership state.
   145  	// TODO(fwereade): ideally, this wouldn't be created here; as a worker it's
   146  	// clearly better off being managed by a Runner. However, we haven't come up
   147  	// with a clean way to reference one (lineage of a...) worker from another,
   148  	// so for now the tracker is accessible only to its unit.
   149  	leadershipTracker := leadership.NewTrackerWorker(
   150  		unitTag, u.leadershipManager, leadershipGuarantee,
   151  	)
   152  	u.addCleanup(func() error {
   153  		return worker.Stop(leadershipTracker)
   154  	})
   155  	u.leadershipTracker = leadershipTracker
   156  
   157  	if err := u.init(unitTag); err != nil {
   158  		if err == worker.ErrTerminateAgent {
   159  			return err
   160  		}
   161  		return fmt.Errorf("failed to initialize uniter for %q: %v", unitTag, err)
   162  	}
   163  	logger.Infof("unit %q started", u.unit)
   164  
   165  	// Start filtering state change events for consumption by modes.
   166  	u.f, err = filter.NewFilter(u.st, unitTag)
   167  	if err != nil {
   168  		return err
   169  	}
   170  	u.addCleanup(u.f.Stop)
   171  
   172  	// Stop the uniter if either of these components fails.
   173  	go func() { u.tomb.Kill(leadershipTracker.Wait()) }()
   174  	go func() { u.tomb.Kill(u.f.Wait()) }()
   175  
   176  	// Start handling leader settings events, or not, as appropriate.
   177  	u.f.WantLeaderSettingsEvents(!u.operationState().Leader)
   178  
   179  	// Run modes until we encounter an error.
   180  	mode := ModeContinue
   181  	for err == nil {
   182  		select {
   183  		case <-u.tomb.Dying():
   184  			err = tomb.ErrDying
   185  		default:
   186  			mode, err = mode(u)
   187  			switch cause := errors.Cause(err); cause {
   188  			case operation.ErrNeedsReboot:
   189  				err = worker.ErrRebootMachine
   190  			case tomb.ErrDying, worker.ErrTerminateAgent:
   191  				err = cause
   192  			case operation.ErrHookFailed:
   193  				mode, err = ModeHookError, nil
   194  			default:
   195  				charmURL, ok := operation.DeployConflictCharmURL(cause)
   196  				if ok {
   197  					mode, err = ModeConflicted(charmURL), nil
   198  				}
   199  			}
   200  		}
   201  	}
   202  	logger.Infof("unit %q shutting down: %s", u.unit, err)
   203  	return err
   204  }
   205  
   206  func (u *Uniter) setupLocks() (err error) {
   207  	if message := u.hookLock.Message(); u.hookLock.IsLocked() && message != "" {
   208  		// Look to see if it was us that held the lock before.  If it was, we
   209  		// should be safe enough to break it, as it is likely that we died
   210  		// before unlocking, and have been restarted by the init system.
   211  		parts := strings.SplitN(message, ":", 2)
   212  		if len(parts) > 1 && parts[0] == u.unit.Name() {
   213  			if err := u.hookLock.BreakLock(); err != nil {
   214  				return err
   215  			}
   216  		}
   217  	}
   218  	return nil
   219  }
   220  
   221  func (u *Uniter) init(unitTag names.UnitTag) (err error) {
   222  	u.unit, err = u.st.Unit(unitTag)
   223  	if err != nil {
   224  		return err
   225  	}
   226  	if u.unit.Life() == params.Dead {
   227  		// If we started up already dead, we should not progress further. If we
   228  		// become Dead immediately after starting up, we may well complete any
   229  		// operations in progress before detecting it; but that race is fundamental
   230  		// and inescapable, whereas this one is not.
   231  		return worker.ErrTerminateAgent
   232  	}
   233  	if err = u.setupLocks(); err != nil {
   234  		return err
   235  	}
   236  	if err := jujuc.EnsureSymlinks(u.paths.ToolsDir); err != nil {
   237  		return err
   238  	}
   239  	if err := os.MkdirAll(u.paths.State.RelationsDir, 0755); err != nil {
   240  		return errors.Trace(err)
   241  	}
   242  	relations, err := newRelations(u.st, unitTag, u.paths, u.tomb.Dying())
   243  	if err != nil {
   244  		return errors.Annotatef(err, "cannot create relations")
   245  	}
   246  	u.relations = relations
   247  	storageAttachments, err := storage.NewAttachments(
   248  		u.st, unitTag, u.paths.State.StorageDir, u.tomb.Dying(),
   249  	)
   250  	if err != nil {
   251  		return errors.Annotatef(err, "cannot create storage hook source")
   252  	}
   253  	u.storage = storageAttachments
   254  	u.addCleanup(storageAttachments.Stop)
   255  
   256  	deployer, err := charm.NewDeployer(
   257  		u.paths.State.CharmDir,
   258  		u.paths.State.DeployerDir,
   259  		charm.NewBundlesDir(u.paths.State.BundlesDir),
   260  	)
   261  	if err != nil {
   262  		return errors.Annotatef(err, "cannot create deployer")
   263  	}
   264  	u.deployer = &deployerProxy{deployer}
   265  	runnerFactory, err := runner.NewFactory(
   266  		u.st, unitTag, u.leadershipTracker, u.relations.GetInfo, u.storage, u.paths,
   267  	)
   268  	if err != nil {
   269  		return err
   270  	}
   271  	u.operationFactory = operation.NewFactory(
   272  		u.deployer,
   273  		runnerFactory,
   274  		&operationCallbacks{u},
   275  		u.storage,
   276  		u.tomb.Dying(),
   277  	)
   278  
   279  	operationExecutor, err := operation.NewExecutor(
   280  		u.paths.State.OperationsFile, u.getServiceCharmURL, u.acquireExecutionLock,
   281  	)
   282  	if err != nil {
   283  		return err
   284  	}
   285  	u.operationExecutor = operationExecutor
   286  
   287  	logger.Debugf("starting juju-run listener on unix:%s", u.paths.Runtime.JujuRunSocket)
   288  	u.runListener, err = NewRunListener(u, u.paths.Runtime.JujuRunSocket)
   289  	if err != nil {
   290  		return err
   291  	}
   292  	u.addCleanup(func() error {
   293  		// TODO(fwereade): RunListener returns no error on Close. This seems wrong.
   294  		u.runListener.Close()
   295  		return nil
   296  	})
   297  	// The socket needs to have permissions 777 in order for other users to use it.
   298  	if version.Current.OS != version.Windows {
   299  		return os.Chmod(u.paths.Runtime.JujuRunSocket, 0777)
   300  	}
   301  	return nil
   302  }
   303  
   304  func (u *Uniter) Kill() {
   305  	u.tomb.Kill(nil)
   306  }
   307  
   308  func (u *Uniter) Wait() error {
   309  	return u.tomb.Wait()
   310  }
   311  
   312  func (u *Uniter) Stop() error {
   313  	u.tomb.Kill(nil)
   314  	return u.Wait()
   315  }
   316  
   317  func (u *Uniter) Dead() <-chan struct{} {
   318  	return u.tomb.Dead()
   319  }
   320  
   321  func (u *Uniter) getServiceCharmURL() (*corecharm.URL, error) {
   322  	// TODO(fwereade): pretty sure there's no reason to make 2 API calls here.
   323  	service, err := u.st.Service(u.unit.ServiceTag())
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  	charmURL, _, err := service.CharmURL()
   328  	return charmURL, err
   329  }
   330  
   331  func (u *Uniter) operationState() operation.State {
   332  	return u.operationExecutor.State()
   333  }
   334  
   335  // initializeMetricsCollector enables the periodic collect-metrics hook
   336  // for charms that declare metrics.
   337  func (u *Uniter) initializeMetricsCollector() error {
   338  	charm, err := corecharm.ReadCharmDir(u.paths.State.CharmDir)
   339  	if err != nil {
   340  		return err
   341  	}
   342  	u.collectMetricsAt = u.metricsTimerChooser.getMetricsTimer(charm)
   343  	return nil
   344  }
   345  
   346  // RunCommands executes the supplied commands in a hook context.
   347  func (u *Uniter) RunCommands(args RunCommandsArgs) (results *exec.ExecResponse, err error) {
   348  	// TODO(fwereade): this is *still* all sorts of messed-up and not especially
   349  	// goroutine-safe, but that's not what I'm fixing at the moment. We could
   350  	// address this by:
   351  	//  1) implementing an operation to encapsulate the relations.Update call
   352  	//  2) (quick+dirty) mutex runOperation until we can
   353  	//  3) (correct) feed RunCommands requests into the mode funcs (or any queue
   354  	//     that replaces them) such that they're handled and prioritised like
   355  	//     every other operation.
   356  	logger.Tracef("run commands: %s", args.Commands)
   357  
   358  	type responseInfo struct {
   359  		response *exec.ExecResponse
   360  		err      error
   361  	}
   362  	responseChan := make(chan responseInfo, 1)
   363  	sendResponse := func(response *exec.ExecResponse, err error) {
   364  		responseChan <- responseInfo{response, err}
   365  	}
   366  
   367  	commandArgs := operation.CommandArgs{
   368  		Commands:        args.Commands,
   369  		RelationId:      args.RelationId,
   370  		RemoteUnitName:  args.RemoteUnitName,
   371  		ForceRemoteUnit: args.ForceRemoteUnit,
   372  	}
   373  	err = u.runOperation(newCommandsOp(commandArgs, sendResponse))
   374  	if err == nil {
   375  		select {
   376  		case response := <-responseChan:
   377  			results, err = response.response, response.err
   378  		default:
   379  			err = errors.New("command response never sent")
   380  		}
   381  	}
   382  	if errors.Cause(err) == operation.ErrNeedsReboot {
   383  		u.tomb.Kill(worker.ErrRebootMachine)
   384  		err = nil
   385  	}
   386  	if err != nil {
   387  		u.tomb.Kill(err)
   388  	}
   389  	return results, err
   390  }
   391  
   392  // runOperation uses the uniter's operation factory to run the supplied creation
   393  // func, and then runs the resulting operation.
   394  //
   395  // This has a number of advantages over having mode funcs use the factory and
   396  // executor directly:
   397  //   * it cuts down on duplicated code in the mode funcs, making the logic easier
   398  //     to parse
   399  //   * it narrows the (conceptual) interface exposed to the mode funcs -- one day
   400  //     we might even be able to use a (real) interface and maybe even approach a
   401  //     point where we can run direct unit tests(!) on the modes themselves.
   402  //   * it opens a path to fixing RunCommands -- all operation creation and
   403  //     execution is done in a single place, and it's much easier to force those
   404  //     onto a single thread.
   405  //       * this can't be done quite yet, though, because relation changes are
   406  //         not yet encapsulated in operations, and that needs to happen before
   407  //         RunCommands will *actually* be goroutine-safe.
   408  func (u *Uniter) runOperation(creator creator) error {
   409  	op, err := creator(u.operationFactory)
   410  	if err != nil {
   411  		return errors.Annotatef(err, "cannot create operation")
   412  	}
   413  	before := u.operationState()
   414  	defer func() {
   415  		// Check that if we lose leadership as a result of this
   416  		// operation, we want to start getting leader settings events,
   417  		// or if we gain leadership we want to stop receiving those
   418  		// events.
   419  		if after := u.operationState(); before.Leader != after.Leader {
   420  			u.f.WantLeaderSettingsEvents(before.Leader)
   421  		}
   422  	}()
   423  	return u.operationExecutor.Run(op)
   424  }
   425  
   426  // acquireExecutionLock acquires the machine-level execution lock, and
   427  // returns a func that must be called to unlock it. It's used by operation.Executor
   428  // when running operations that execute external code.
   429  func (u *Uniter) acquireExecutionLock(message string) (func() error, error) {
   430  	// We want to make sure we don't block forever when locking, but take the
   431  	// Uniter's tomb into account.
   432  	checkTomb := func() error {
   433  		select {
   434  		case <-u.tomb.Dying():
   435  			return tomb.ErrDying
   436  		default:
   437  			return nil
   438  		}
   439  	}
   440  	message = fmt.Sprintf("%s: %s", u.unit.Name(), message)
   441  	if err := u.hookLock.LockWithFunc(message, checkTomb); err != nil {
   442  		return nil, err
   443  	}
   444  	return func() error { return u.hookLock.Unlock() }, nil
   445  }