github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/upgrader/upgrader.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package upgrader
     5  
     6  import (
     7  	"fmt"
     8  	"net/http"
     9  	"os"
    10  	"time"
    11  
    12  	"github.com/juju/errors"
    13  	"github.com/juju/loggo"
    14  	"github.com/juju/os/series"
    15  	"github.com/juju/utils"
    16  	"github.com/juju/utils/arch"
    17  	"github.com/juju/version"
    18  	"gopkg.in/juju/names.v2"
    19  	"gopkg.in/juju/worker.v1/catacomb"
    20  
    21  	"github.com/juju/juju/agent"
    22  	agenttools "github.com/juju/juju/agent/tools"
    23  	"github.com/juju/juju/api/upgrader"
    24  	coretools "github.com/juju/juju/tools"
    25  	"github.com/juju/juju/upgrades"
    26  	jujuversion "github.com/juju/juju/version"
    27  	"github.com/juju/juju/worker/gate"
    28  )
    29  
    30  const (
    31  	// shortDelay is the time we normally sleep for in the main loop
    32  	// when polling for changes to the model's version.
    33  	shortDelay = 5 * time.Second
    34  
    35  	// notEnoughSpaceDelay is how long we sleep when there's a new
    36  	// version of the agent that we need to download but there isn't
    37  	// enough available space to download and unpack it. Sleeping
    38  	// longer in that situation means we don't spam the log with disk
    39  	// space errors every 3 seconds, but still bring the message up
    40  	// regularly.
    41  	notEnoughSpaceDelay = time.Minute
    42  )
    43  
    44  // retryAfter returns a channel that receives a value
    45  // when a failed download should be retried.
    46  var retryAfter = func(duration time.Duration) <-chan time.Time {
    47  	// TODO(fwereade): 2016-03-17 lp:1558657
    48  	return time.After(duration)
    49  }
    50  
    51  var logger = loggo.GetLogger("juju.worker.upgrader")
    52  
    53  // Upgrader represents a worker that watches the state for upgrade
    54  // requests.
    55  type Upgrader struct {
    56  	catacomb catacomb.Catacomb
    57  	st       *upgrader.State
    58  	dataDir  string
    59  	tag      names.Tag
    60  	config   Config
    61  }
    62  
    63  // Config contains the items the worker needs to start.
    64  type Config struct {
    65  	State                       *upgrader.State
    66  	AgentConfig                 agent.Config
    67  	OrigAgentVersion            version.Number
    68  	UpgradeStepsWaiter          gate.Waiter
    69  	InitialUpgradeCheckComplete gate.Unlocker
    70  	CheckDiskSpace              func(string, uint64) error
    71  }
    72  
    73  // NewAgentUpgrader returns a new upgrader worker. It watches changes to the
    74  // current version of the current agent (with the given tag) and tries to
    75  // download the tools for any new version into the given data directory.  If
    76  // an upgrade is needed, the worker will exit with an UpgradeReadyError
    77  // holding details of the requested upgrade. The tools will have been
    78  // downloaded and unpacked.
    79  func NewAgentUpgrader(config Config) (*Upgrader, error) {
    80  	u := &Upgrader{
    81  		st:      config.State,
    82  		dataDir: config.AgentConfig.DataDir(),
    83  		tag:     config.AgentConfig.Tag(),
    84  		config:  config,
    85  	}
    86  	err := catacomb.Invoke(catacomb.Plan{
    87  		Site: &u.catacomb,
    88  		Work: u.loop,
    89  	})
    90  	if err != nil {
    91  		return nil, errors.Trace(err)
    92  	}
    93  	return u, nil
    94  }
    95  
    96  // Kill implements worker.Worker.Kill.
    97  func (u *Upgrader) Kill() {
    98  	u.catacomb.Kill(nil)
    99  }
   100  
   101  // Wait implements worker.Worker.Wait.
   102  func (u *Upgrader) Wait() error {
   103  	return u.catacomb.Wait()
   104  }
   105  
   106  // Stop stops the upgrader and returns any
   107  // error it encountered when running.
   108  func (u *Upgrader) Stop() error {
   109  	u.Kill()
   110  	return u.Wait()
   111  }
   112  
   113  // allowedTargetVersion checks if targetVersion is too different from
   114  // curVersion to allow a downgrade.
   115  func allowedTargetVersion(
   116  	origAgentVersion version.Number,
   117  	curVersion version.Number,
   118  	upgradeStepsRunning bool,
   119  	targetVersion version.Number,
   120  ) bool {
   121  	if upgradeStepsRunning && targetVersion == origAgentVersion {
   122  		return true
   123  	}
   124  	if targetVersion.Major < curVersion.Major {
   125  		return false
   126  	}
   127  	if targetVersion.Major == curVersion.Major && targetVersion.Minor < curVersion.Minor {
   128  		return false
   129  	}
   130  	return true
   131  }
   132  
   133  func (u *Upgrader) loop() error {
   134  	// Start by reporting current tools (which includes arch/series, and is
   135  	// used by the controller in communicating the desired version below).
   136  	if err := u.st.SetVersion(u.tag.String(), toBinaryVersion(jujuversion.Current)); err != nil {
   137  		return errors.Annotate(err, "cannot set agent version")
   138  	}
   139  
   140  	// We don't read on the dying channel until we have received the
   141  	// initial event from the API version watcher, thus ensuring
   142  	// that we attempt an upgrade even if other workers are dying
   143  	// all around us. Similarly, we don't want to bind the watcher
   144  	// to the catacomb's lifetime (yet!) lest we wait forever for a
   145  	// stopped watcher.
   146  	//
   147  	// However, that absolutely depends on versionWatcher's guaranteed
   148  	// initial event, and we should assume that it'll break its contract
   149  	// sometime. So we allow the watcher to wait patiently for the event
   150  	// for a full minute; but after that we proceed regardless.
   151  	versionWatcher, err := u.st.WatchAPIVersion(u.tag.String())
   152  	if err != nil {
   153  		return errors.Trace(err)
   154  	}
   155  	logger.Infof("abort check blocked until version event received")
   156  	// TODO(fwereade): 2016-03-17 lp:1558657
   157  	mustProceed := time.After(time.Minute)
   158  	var dying <-chan struct{}
   159  	allowDying := func() {
   160  		if dying == nil {
   161  			logger.Infof("unblocking abort check")
   162  			mustProceed = nil
   163  			dying = u.catacomb.Dying()
   164  			if err := u.catacomb.Add(versionWatcher); err != nil {
   165  				u.catacomb.Kill(err)
   166  			}
   167  		}
   168  	}
   169  
   170  	var retry <-chan time.Time
   171  	for {
   172  		select {
   173  		// NOTE: retry and dying both start out nil, so they can't be chosen
   174  		// first time round the loop. However...
   175  		case <-retry:
   176  		case <-dying:
   177  			return u.catacomb.ErrDying()
   178  		// ...*every* other case *must* allowDying(), before doing anything
   179  		// else, lest an error cause us to leak versionWatcher.
   180  		case <-mustProceed:
   181  			logger.Infof("version event not received after one minute")
   182  			allowDying()
   183  		case _, ok := <-versionWatcher.Changes():
   184  			allowDying()
   185  			if !ok {
   186  				return errors.New("version watcher closed")
   187  			}
   188  		}
   189  
   190  		wantVersion, err := u.st.DesiredVersion(u.tag.String())
   191  		if err != nil {
   192  			return err
   193  		}
   194  		logger.Infof("desired agent binary version: %v", wantVersion)
   195  
   196  		if wantVersion == jujuversion.Current {
   197  			u.config.InitialUpgradeCheckComplete.Unlock()
   198  			continue
   199  		} else if !allowedTargetVersion(
   200  			u.config.OrigAgentVersion,
   201  			jujuversion.Current,
   202  			!u.config.UpgradeStepsWaiter.IsUnlocked(),
   203  			wantVersion,
   204  		) {
   205  			// See also bug #1299802 where when upgrading from
   206  			// 1.16 to 1.18 there is a race condition that can
   207  			// cause the unit agent to upgrade, and then want to
   208  			// downgrade when its associate machine agent has not
   209  			// finished upgrading.
   210  			logger.Infof("desired agent binary version: %s is older than current %s, refusing to downgrade",
   211  				wantVersion, jujuversion.Current)
   212  			u.config.InitialUpgradeCheckComplete.Unlock()
   213  			continue
   214  		}
   215  		logger.Infof("upgrade requested from %v to %v", jujuversion.Current, wantVersion)
   216  
   217  		// Check if tools have already been downloaded.
   218  		wantVersionBinary := toBinaryVersion(wantVersion)
   219  		if u.toolsAlreadyDownloaded(wantVersionBinary) {
   220  			return u.newUpgradeReadyError(wantVersionBinary)
   221  		}
   222  
   223  		// Check if tools are available for download.
   224  		wantToolsList, err := u.st.Tools(u.tag.String())
   225  		if err != nil {
   226  			// Not being able to lookup Tools is considered fatal
   227  			return err
   228  		}
   229  		// The worker cannot be stopped while we're downloading
   230  		// the tools - this means that even if the API is going down
   231  		// repeatedly (causing the agent to be stopped), as long
   232  		// as we have got as far as this, we will still be able to
   233  		// upgrade the agent.
   234  		delay := shortDelay
   235  		for _, wantTools := range wantToolsList {
   236  			if err := u.checkForSpace(); err != nil {
   237  				logger.Errorf("%s", err.Error())
   238  				delay = notEnoughSpaceDelay
   239  				break
   240  			}
   241  			err = u.ensureTools(wantTools)
   242  			if err == nil {
   243  				return u.newUpgradeReadyError(wantTools.Version)
   244  			}
   245  			logger.Errorf("failed to fetch agent binaries from %q: %v", wantTools.URL, err)
   246  		}
   247  		retry = retryAfter(delay)
   248  	}
   249  }
   250  
   251  func toBinaryVersion(vers version.Number) version.Binary {
   252  	outVers := version.Binary{
   253  		Number: vers,
   254  		Arch:   arch.HostArch(),
   255  		Series: series.MustHostSeries(),
   256  	}
   257  	return outVers
   258  }
   259  
   260  func (u *Upgrader) toolsAlreadyDownloaded(wantVersion version.Binary) bool {
   261  	_, err := agenttools.ReadTools(u.dataDir, wantVersion)
   262  	return err == nil
   263  }
   264  
   265  func (u *Upgrader) newUpgradeReadyError(newVersion version.Binary) *UpgradeReadyError {
   266  	return &UpgradeReadyError{
   267  		OldTools:  toBinaryVersion(jujuversion.Current),
   268  		NewTools:  newVersion,
   269  		AgentName: u.tag.String(),
   270  		DataDir:   u.dataDir,
   271  	}
   272  }
   273  
   274  func (u *Upgrader) ensureTools(agentTools *coretools.Tools) error {
   275  	logger.Infof("fetching agent binaries from %q", agentTools.URL)
   276  	// The reader MUST verify the tools' hash, so there is no
   277  	// need to validate the peer. We cannot anyway: see http://pad.lv/1261780.
   278  	resp, err := utils.GetNonValidatingHTTPClient().Get(agentTools.URL)
   279  	if err != nil {
   280  		return err
   281  	}
   282  	defer resp.Body.Close()
   283  	if resp.StatusCode != http.StatusOK {
   284  		return fmt.Errorf("bad HTTP response: %v", resp.Status)
   285  	}
   286  	err = agenttools.UnpackTools(u.dataDir, agentTools, resp.Body)
   287  	if err != nil {
   288  		return fmt.Errorf("cannot unpack agent binaries: %v", err)
   289  	}
   290  	logger.Infof("unpacked agent binaries %s to %s", agentTools.Version, u.dataDir)
   291  	return nil
   292  }
   293  
   294  func (u *Upgrader) checkForSpace() error {
   295  	logger.Debugf("checking available space before downloading")
   296  	err := u.config.CheckDiskSpace(u.dataDir, upgrades.MinDiskSpaceMib)
   297  	if err != nil {
   298  		return errors.Trace(err)
   299  	}
   300  	err = u.config.CheckDiskSpace(os.TempDir(), upgrades.MinDiskSpaceMib)
   301  	if err != nil {
   302  		return errors.Trace(err)
   303  	}
   304  	return nil
   305  }