github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/upgradeseries/worker.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package upgradeseries
     5  
     6  import (
     7  	"strings"
     8  	"sync"
     9  
    10  	"github.com/juju/errors"
    11  	"gopkg.in/juju/names.v2"
    12  	"gopkg.in/juju/worker.v1"
    13  	"gopkg.in/juju/worker.v1/catacomb"
    14  
    15  	"github.com/juju/juju/apiserver/params"
    16  	"github.com/juju/juju/core/model"
    17  	"github.com/juju/juju/service"
    18  	"github.com/juju/os/series"
    19  )
    20  
    21  //go:generate mockgen -package mocks -destination mocks/package_mock.go github.com/juju/juju/worker/upgradeseries Facade,Logger,AgentService,ServiceAccess,Upgrader
    22  
    23  var hostSeries = series.HostSeries
    24  
    25  // Logger represents the methods required to emit log messages.
    26  type Logger interface {
    27  	Debugf(message string, args ...interface{})
    28  	Infof(message string, args ...interface{})
    29  	Warningf(message string, args ...interface{})
    30  	Errorf(message string, args ...interface{})
    31  }
    32  
    33  // Config is the configuration needed to construct an UpgradeSeries worker.
    34  type Config struct {
    35  	// FacadeFactory is used to acquire back-end state with
    36  	// the input tag context.
    37  	FacadeFactory func(names.Tag) Facade
    38  
    39  	// Logger is the logger for this worker.
    40  	Logger Logger
    41  
    42  	// Tag is the current machine tag.
    43  	Tag names.Tag
    44  
    45  	// ServiceAccess provides access to the local init system.
    46  	Service ServiceAccess
    47  
    48  	// UpgraderFactory is a factory method that will return an upgrader capable
    49  	// of handling service and agent binary manipulation for a
    50  	// runtime-determined target OS series.
    51  	UpgraderFactory func(string) (Upgrader, error)
    52  }
    53  
    54  // Validate validates the upgrade-series worker configuration.
    55  func (config Config) Validate() error {
    56  	if config.Logger == nil {
    57  		return errors.NotValidf("nil Logger")
    58  	}
    59  	if config.Tag == nil {
    60  		return errors.NotValidf("nil machine tag")
    61  	}
    62  	k := config.Tag.Kind()
    63  	if k != names.MachineTagKind {
    64  		return errors.NotValidf("%q tag kind", k)
    65  	}
    66  	if config.FacadeFactory == nil {
    67  		return errors.NotValidf("nil FacadeFactory")
    68  	}
    69  	if config.Service == nil {
    70  		return errors.NotValidf("nil Service")
    71  	}
    72  	return nil
    73  }
    74  
    75  // upgradeSeriesWorker is responsible for machine and unit agent requirements
    76  // during upgrade-series:
    77  // 		copying the agent binary directory and renaming;
    78  // 		rewriting the machine and unit(s) systemd files if necessary;
    79  //		ensuring unit agents are started post-upgrade;
    80  //		moving the status of the upgrade-series steps along.
    81  type upgradeSeriesWorker struct {
    82  	Facade
    83  
    84  	facadeFactory   func(names.Tag) Facade
    85  	catacomb        catacomb.Catacomb
    86  	logger          Logger
    87  	service         ServiceAccess
    88  	upgraderFactory func(string) (Upgrader, error)
    89  
    90  	// Some local state retained for reporting purposes.
    91  	mu             sync.Mutex
    92  	machineStatus  model.UpgradeSeriesStatus
    93  	preparedUnits  []names.UnitTag
    94  	completedUnits []names.UnitTag
    95  
    96  	// Ensure that leaders are pinned only once if possible,
    97  	// on the first transition to UpgradeSeriesPrepareStarted.
    98  	// However repeated pin calls are not of too much concern,
    99  	// as the pin operations are idempotent.
   100  	leadersPinned bool
   101  }
   102  
   103  // NewWorker creates, starts and returns a new upgrade-series worker based on
   104  // the input configuration.
   105  func NewWorker(config Config) (worker.Worker, error) {
   106  	if err := config.Validate(); err != nil {
   107  		return nil, errors.Trace(err)
   108  	}
   109  
   110  	w := &upgradeSeriesWorker{
   111  		Facade:          config.FacadeFactory(config.Tag),
   112  		facadeFactory:   config.FacadeFactory,
   113  		logger:          config.Logger,
   114  		service:         config.Service,
   115  		upgraderFactory: config.UpgraderFactory,
   116  		machineStatus:   model.UpgradeSeriesNotStarted,
   117  		leadersPinned:   false,
   118  	}
   119  
   120  	if err := catacomb.Invoke(catacomb.Plan{
   121  		Site: &w.catacomb,
   122  		Work: w.loop,
   123  	}); err != nil {
   124  		return nil, errors.Trace(err)
   125  	}
   126  
   127  	return w, nil
   128  }
   129  
   130  func (w *upgradeSeriesWorker) loop() error {
   131  	uw, err := w.WatchUpgradeSeriesNotifications()
   132  	if err != nil {
   133  		return errors.Trace(err)
   134  	}
   135  	err = w.catacomb.Add(uw)
   136  	if err != nil {
   137  		return errors.Trace(err)
   138  	}
   139  	for {
   140  		select {
   141  		case <-w.catacomb.Dying():
   142  			return w.catacomb.ErrDying()
   143  		case <-uw.Changes():
   144  			if err := w.handleUpgradeSeriesChange(); err != nil {
   145  				return errors.Trace(err)
   146  			}
   147  		}
   148  	}
   149  }
   150  
   151  // handleUpgradeSeriesChange retrieves the current upgrade-series status for
   152  // this machine and based on the status, calls methods that will progress
   153  // the workflow accordingly.
   154  func (w *upgradeSeriesWorker) handleUpgradeSeriesChange() error {
   155  	w.mu.Lock()
   156  	defer w.mu.Unlock()
   157  
   158  	var err error
   159  	if w.machineStatus, err = w.MachineStatus(); err != nil {
   160  		if errors.IsNotFound(err) {
   161  			// No upgrade-series lock. This can happen when:
   162  			// - The first watch call is made.
   163  			// - The lock is removed after a completed upgrade.
   164  			w.logger.Infof("no series upgrade lock present")
   165  			w.machineStatus = model.UpgradeSeriesNotStarted
   166  			w.preparedUnits = nil
   167  			w.completedUnits = nil
   168  			return nil
   169  		}
   170  		return errors.Trace(err)
   171  	}
   172  	w.logger.Infof("machine series upgrade status is %q", w.machineStatus)
   173  
   174  	switch w.machineStatus {
   175  	case model.UpgradeSeriesPrepareStarted:
   176  		err = w.handlePrepareStarted()
   177  	case model.UpgradeSeriesCompleteStarted:
   178  		err = w.handleCompleteStarted()
   179  	case model.UpgradeSeriesCompleted:
   180  		err = w.handleCompleted()
   181  	}
   182  	return errors.Trace(err)
   183  }
   184  
   185  // handlePrepareStarted handles workflow for the machine with an upgrade-series
   186  // lock status of "UpgradeSeriesPrepareStarted"
   187  func (w *upgradeSeriesWorker) handlePrepareStarted() error {
   188  	var err error
   189  	if !w.leadersPinned {
   190  		if err = w.pinLeaders(); err != nil {
   191  			return errors.Trace(err)
   192  		}
   193  	}
   194  
   195  	if w.preparedUnits, err = w.UnitsPrepared(); err != nil {
   196  		return errors.Trace(err)
   197  	}
   198  
   199  	unitServices, allConfirmed, err := w.compareUnitAgentServices(w.preparedUnits)
   200  	if err != nil {
   201  		return errors.Trace(err)
   202  	}
   203  	if !allConfirmed {
   204  		w.logger.Debugf(
   205  			"waiting for units to complete series upgrade preparation; known unit agent services: %s",
   206  			unitNames(unitServices),
   207  		)
   208  		return nil
   209  	}
   210  
   211  	return errors.Trace(w.transitionPrepareComplete(unitServices))
   212  }
   213  
   214  // transitionPrepareComplete rewrites service unit files for unit agents running
   215  // on this machine so that they are compatible with the init system of the
   216  // series upgrade target.
   217  func (w *upgradeSeriesWorker) transitionPrepareComplete(unitServices map[string]string) error {
   218  	w.logger.Infof("preparing service units for series upgrade")
   219  	toSeries, err := w.TargetSeries()
   220  	if err != nil {
   221  		return errors.Trace(err)
   222  	}
   223  	upgrader, err := w.upgraderFactory(toSeries)
   224  	if err != nil {
   225  		return errors.Trace(err)
   226  	}
   227  	if err := upgrader.PerformUpgrade(); err != nil {
   228  		return errors.Trace(err)
   229  	}
   230  	return errors.Trace(w.SetMachineStatus(model.UpgradeSeriesPrepareCompleted,
   231  		"binaries and service files written"))
   232  }
   233  
   234  func (w *upgradeSeriesWorker) handleCompleteStarted() error {
   235  	var err error
   236  	if w.preparedUnits, err = w.UnitsPrepared(); err != nil {
   237  		return errors.Trace(err)
   238  	}
   239  
   240  	// If the units are still all in the "PrepareComplete" state, then the
   241  	// manual tasks have been run and an operator has executed the
   242  	// upgrade-series completion command; start all the unit agents,
   243  	// and progress the workflow.
   244  	unitServices, allConfirmed, err := w.compareUnitAgentServices(w.preparedUnits)
   245  	if err != nil {
   246  		return errors.Trace(err)
   247  	}
   248  	servicesPresent := len(unitServices) > 0
   249  
   250  	// allConfirmed returns true when there are no units, so we only need this
   251  	// transition when there are services to start.
   252  	// If there are none, just proceed to the completed stage.
   253  	if allConfirmed && servicesPresent {
   254  		return errors.Trace(w.transitionUnitsStarted(unitServices))
   255  	}
   256  
   257  	// If the units have all completed their workflow, then we are done.
   258  	// Make the final update to the lock to say the machine is completed.
   259  	if w.completedUnits, err = w.UnitsCompleted(); err != nil {
   260  		return errors.Trace(err)
   261  	}
   262  
   263  	unitServices, allConfirmed, err = w.compareUnitAgentServices(w.completedUnits)
   264  	if err != nil {
   265  		return errors.Trace(err)
   266  	}
   267  
   268  	if allConfirmed {
   269  		w.logger.Infof("series upgrade complete")
   270  		return errors.Trace(w.SetMachineStatus(model.UpgradeSeriesCompleted, "series upgrade complete"))
   271  	}
   272  
   273  	return nil
   274  }
   275  
   276  // transitionUnitsStarted iterates over units managed by this machine. Starts
   277  // the unit's agent service, and transitions all unit subordinate statuses.
   278  func (w *upgradeSeriesWorker) transitionUnitsStarted(unitServices map[string]string) error {
   279  	w.logger.Infof("ensuring units are up after series upgrade")
   280  
   281  	for unit, serviceName := range unitServices {
   282  		svc, err := w.service.DiscoverService(serviceName)
   283  		if err != nil {
   284  			return errors.Trace(err)
   285  		}
   286  		running, err := svc.Running()
   287  		if err != nil {
   288  			return errors.Trace(err)
   289  		}
   290  		if running {
   291  			continue
   292  		}
   293  		if err := svc.Start(); err != nil {
   294  			return errors.Annotatef(err, "starting %q unit agent after series upgrade", unit)
   295  		}
   296  	}
   297  
   298  	return errors.Trace(w.StartUnitCompletion("started unit agents after series upgrade"))
   299  }
   300  
   301  // handleCompleted notifies the server that it has completed the upgrade
   302  // workflow, then unpins leadership for applications running on the machine.
   303  func (w *upgradeSeriesWorker) handleCompleted() error {
   304  	s, err := hostSeries()
   305  	if err != nil {
   306  		return errors.Trace(err)
   307  	}
   308  	if err = w.FinishUpgradeSeries(s); err != nil {
   309  		return errors.Trace(err)
   310  	}
   311  	return errors.Trace(w.unpinLeaders())
   312  }
   313  
   314  // compareUnitsAgentServices filters the services running on the local machine
   315  // to those that are for unit agents.
   316  // The service names keyed by unit names are returned, along with a boolean
   317  // indicating whether all the input unit tags are represented in the
   318  // service map.
   319  // NOTE: No unit tags and no agent services returns true, meaning that the
   320  // workflow can progress.
   321  func (w *upgradeSeriesWorker) compareUnitAgentServices(units []names.UnitTag) (map[string]string, bool, error) {
   322  	unitServices, err := w.unitServices()
   323  	if err != nil {
   324  		return nil, false, errors.Trace(err)
   325  	}
   326  	if len(unitServices) == 0 {
   327  		w.logger.Debugf("no unit agent services found")
   328  	}
   329  	if len(units) != len(unitServices) {
   330  		return unitServices, false, nil
   331  	}
   332  
   333  	for _, u := range units {
   334  		if _, ok := unitServices[u.Id()]; !ok {
   335  			return unitServices, false, nil
   336  		}
   337  	}
   338  	return unitServices, true, nil
   339  }
   340  
   341  // pinLeaders pins leadership for applications
   342  // represented by units running on this machine.
   343  func (w *upgradeSeriesWorker) pinLeaders() (err error) {
   344  	// if we encounter an error,
   345  	// attempt to ensure that no application leaders remain pinned.
   346  	defer func() {
   347  		if err != nil {
   348  			if unpinErr := w.unpinLeaders(); unpinErr != nil {
   349  				err = errors.Wrap(err, unpinErr)
   350  			}
   351  		}
   352  	}()
   353  
   354  	results, err := w.PinMachineApplications()
   355  	if err != nil {
   356  		// If pin machine applications method return not implemented because it's
   357  		// utilising the legacy leases store, then we should display the warning
   358  		// in the log and return out. Unpinning leaders should be safe as that
   359  		// should be considered a no-op
   360  		if params.IsCodeNotImplemented(err) {
   361  			w.logger.Infof("failed to pin machine applications, with legacy lease manager leadership pinning is not implemented")
   362  			return nil
   363  		}
   364  		return errors.Trace(err)
   365  	}
   366  
   367  	var lastErr error
   368  	for app, err := range results {
   369  		if err == nil {
   370  			w.logger.Infof("unpin leader for application %q", app)
   371  			continue
   372  		}
   373  		w.logger.Errorf("failed to pin leader for application %q: %s", app, err.Error())
   374  		lastErr = err
   375  	}
   376  
   377  	if lastErr == nil {
   378  		w.leadersPinned = true
   379  		return nil
   380  	}
   381  	return errors.Trace(lastErr)
   382  }
   383  
   384  // unpinLeaders unpins leadership for applications
   385  // represented by units running on this machine.
   386  func (w *upgradeSeriesWorker) unpinLeaders() error {
   387  	results, err := w.UnpinMachineApplications()
   388  	if err != nil {
   389  		return errors.Trace(err)
   390  	}
   391  
   392  	var lastErr error
   393  	for app, err := range results {
   394  		if err == nil {
   395  			w.logger.Infof("unpinned leader for application %q", app)
   396  			continue
   397  		}
   398  		w.logger.Errorf("failed to unpin leader for application %q: %s", app, err.Error())
   399  		lastErr = err
   400  	}
   401  
   402  	if lastErr == nil {
   403  		w.leadersPinned = false
   404  		return nil
   405  	}
   406  	return errors.Trace(lastErr)
   407  }
   408  
   409  // Unit services returns a map of unit agent service names,
   410  // keyed on their unit IDs.
   411  func (w *upgradeSeriesWorker) unitServices() (map[string]string, error) {
   412  	services, err := w.service.ListServices()
   413  	if err != nil {
   414  		return nil, errors.Trace(err)
   415  	}
   416  	return service.FindUnitServiceNames(services), nil
   417  }
   418  
   419  // Report (worker.Reporter) generates a report for the Juju engine.
   420  func (w *upgradeSeriesWorker) Report() map[string]interface{} {
   421  	w.mu.Lock()
   422  	defer w.mu.Unlock()
   423  
   424  	report := map[string]interface{}{"machine status": w.machineStatus}
   425  
   426  	if len(w.preparedUnits) > 0 {
   427  		units := make([]string, len(w.preparedUnits))
   428  		for i, u := range w.preparedUnits {
   429  			units[i] = u.Id()
   430  		}
   431  		report["prepared units"] = units
   432  	}
   433  
   434  	if len(w.completedUnits) > 0 {
   435  		units := make([]string, len(w.completedUnits))
   436  		for i, u := range w.completedUnits {
   437  			units[i] = u.Id()
   438  		}
   439  		report["completed units"] = units
   440  	}
   441  
   442  	return report
   443  }
   444  
   445  // Kill implements worker.Worker.Kill.
   446  func (w *upgradeSeriesWorker) Kill() {
   447  	w.catacomb.Kill(nil)
   448  }
   449  
   450  // Wait implements worker.Worker.Wait.
   451  func (w *upgradeSeriesWorker) Wait() error {
   452  	return w.catacomb.Wait()
   453  }
   454  
   455  // Stop stops the upgrade-series worker and returns any
   456  // error it encountered when running.
   457  func (w *upgradeSeriesWorker) Stop() error {
   458  	w.Kill()
   459  	return w.Wait()
   460  }
   461  
   462  // unitNames returns a comma-delimited string of unit names based on the input
   463  // map of unit agent services.
   464  func unitNames(units map[string]string) string {
   465  	unitIds := make([]string, len(units))
   466  	i := 0
   467  	for u := range units {
   468  		unitIds[i] = u
   469  		i++
   470  	}
   471  	return strings.Join(unitIds, ", ")
   472  }