github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/upgrade.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  /*
     5  This file defines infrastructure for synchronising controller tools
     6  upgrades. Synchronisation is handled via a mongo DB document in the
     7  "upgradeInfo" collection.
     8  
     9  The functionality here is intended to be used as follows:
    10  
    11  1. When controllers come up running the new tools version, they call
    12  EnsureUpgradeInfo before running upgrade steps.
    13  
    14  2a. Any secondary controller watches the UpgradeInfo document and
    15  waits for the status to change to UpgradeFinishing.
    16  
    17  2b. The master controller watches the UpgradeInfo document and waits
    18  for AllProvisionedControllersReady to return true. This indicates
    19  that all provisioned controllers have called EnsureUpgradeInfo and
    20  are ready to upgrade.
    21  
    22  3. The master controller calls SetStatus with UpgradeRunning and
    23  runs its upgrade steps.
    24  
    25  4. The master controller calls SetStatus with UpgradeFinishing and
    26  then calls SetControllerDone with it's own machine id.
    27  
    28  5. Secondary controllers, seeing that the status has changed to
    29  UpgradeFinishing, run their upgrade steps and then call
    30  SetControllerDone when complete.
    31  
    32  6. Once the final controller calls SetControllerDone, the status is
    33  changed to UpgradeComplete and the upgradeInfo document is archived.
    34  */
    35  
    36  package state
    37  
    38  import (
    39  	"time"
    40  
    41  	"github.com/juju/errors"
    42  	jujutxn "github.com/juju/txn"
    43  	"github.com/juju/utils/set"
    44  	"github.com/juju/version"
    45  	"gopkg.in/mgo.v2"
    46  	"gopkg.in/mgo.v2/bson"
    47  	"gopkg.in/mgo.v2/txn"
    48  )
    49  
    50  // UpgradeStatus describes the states an upgrade operation may be in.
    51  type UpgradeStatus string
    52  
    53  const (
    54  	// UpgradePending indicates that an upgrade is queued but not yet started.
    55  	UpgradePending UpgradeStatus = "pending"
    56  
    57  	// UpgradeRunning indicates that the master controller has started
    58  	// running upgrade logic, and other controllers are waiting for it.
    59  	UpgradeRunning UpgradeStatus = "running"
    60  
    61  	// UpgradeFinishing indicates that the master controller has finished
    62  	// running upgrade logic, and other controllers are catching up.
    63  	UpgradeFinishing UpgradeStatus = "finishing"
    64  
    65  	// UpgradeComplete indicates that all controllers have finished running
    66  	// upgrade logic.
    67  	UpgradeComplete UpgradeStatus = "complete"
    68  
    69  	// UpgradeAborted indicates that the upgrade wasn't completed due
    70  	// to some problem.
    71  	UpgradeAborted UpgradeStatus = "aborted"
    72  
    73  	// currentUpgradeId is the mongo _id of the current upgrade info document.
    74  	currentUpgradeId = "current"
    75  )
    76  
    77  type upgradeInfoDoc struct {
    78  	Id               string         `bson:"_id"`
    79  	PreviousVersion  version.Number `bson:"previousVersion"`
    80  	TargetVersion    version.Number `bson:"targetVersion"`
    81  	Status           UpgradeStatus  `bson:"status"`
    82  	Started          time.Time      `bson:"started"`
    83  	ControllersReady []string       `bson:"controllersReady"`
    84  	ControllersDone  []string       `bson:"controllersDone"`
    85  }
    86  
    87  // UpgradeInfo is used to synchronise controller upgrades.
    88  type UpgradeInfo struct {
    89  	st  *State
    90  	doc upgradeInfoDoc
    91  }
    92  
    93  // PreviousVersion returns the version being upgraded from.
    94  func (info *UpgradeInfo) PreviousVersion() version.Number {
    95  	return info.doc.PreviousVersion
    96  }
    97  
    98  // TargetVersion returns the version being upgraded to.
    99  func (info *UpgradeInfo) TargetVersion() version.Number {
   100  	return info.doc.TargetVersion
   101  }
   102  
   103  // Status returns the status of the upgrade.
   104  func (info *UpgradeInfo) Status() UpgradeStatus {
   105  	return info.doc.Status
   106  }
   107  
   108  // Started returns the time at which the upgrade was started.
   109  func (info *UpgradeInfo) Started() time.Time {
   110  	return info.doc.Started
   111  }
   112  
   113  // ControllersReady returns the machine ids for controllers that
   114  // have signalled that they are ready for upgrade.
   115  func (info *UpgradeInfo) ControllersReady() []string {
   116  	result := make([]string, len(info.doc.ControllersReady))
   117  	copy(result, info.doc.ControllersReady)
   118  	return result
   119  }
   120  
   121  // ControllersDone returns the machine ids for controllers that
   122  // have completed their upgrades.
   123  func (info *UpgradeInfo) ControllersDone() []string {
   124  	result := make([]string, len(info.doc.ControllersDone))
   125  	copy(result, info.doc.ControllersDone)
   126  	return result
   127  }
   128  
   129  // Refresh updates the contents of the UpgradeInfo from underlying state.
   130  func (info *UpgradeInfo) Refresh() error {
   131  	doc, err := currentUpgradeInfoDoc(info.st)
   132  	if err != nil {
   133  		return errors.Trace(err)
   134  	}
   135  	info.doc = *doc
   136  	return nil
   137  }
   138  
   139  // Watch returns a watcher for the state underlying the current
   140  // UpgradeInfo instance. This is provided purely for convenience.
   141  func (info *UpgradeInfo) Watch() NotifyWatcher {
   142  	return info.st.WatchUpgradeInfo()
   143  }
   144  
   145  // AllProvisionedControllersReady returns true if and only if all controllers
   146  // that have been started by the provisioner have called EnsureUpgradeInfo with
   147  // matching versions.
   148  //
   149  // When this returns true the master state controller can begin it's
   150  // own upgrade.
   151  func (info *UpgradeInfo) AllProvisionedControllersReady() (bool, error) {
   152  	provisioned, err := info.getProvisionedControllers()
   153  	if err != nil {
   154  		return false, errors.Trace(err)
   155  	}
   156  	ready := set.NewStrings(info.doc.ControllersReady...)
   157  	missing := set.NewStrings(provisioned...).Difference(ready)
   158  	return missing.IsEmpty(), nil
   159  }
   160  
   161  func (info *UpgradeInfo) getProvisionedControllers() ([]string, error) {
   162  	var provisioned []string
   163  
   164  	controllerInfo, err := info.st.ControllerInfo()
   165  	if err != nil {
   166  		return provisioned, errors.Annotate(err, "cannot read controllers")
   167  	}
   168  
   169  	upgradeDone, err := info.isModelUUIDUpgradeDone()
   170  	if err != nil {
   171  		return provisioned, errors.Trace(err)
   172  	}
   173  
   174  	// Extract current and provisioned controllers.
   175  	instanceData, closer := info.st.getRawCollection(instanceDataC)
   176  	defer closer()
   177  
   178  	// If instanceData has the env UUID upgrade query using the
   179  	// machineid field, otherwise check using _id.
   180  	var sel bson.D
   181  	var field string
   182  	if upgradeDone {
   183  		sel = bson.D{{"model-uuid", info.st.ModelUUID()}}
   184  		field = "machineid"
   185  	} else {
   186  		field = "_id"
   187  	}
   188  	sel = append(sel, bson.DocElem{field, bson.D{{"$in", controllerInfo.MachineIds}}})
   189  	iter := instanceData.Find(sel).Select(bson.D{{field, true}}).Iter()
   190  
   191  	var doc bson.M
   192  	for iter.Next(&doc) {
   193  		provisioned = append(provisioned, doc[field].(string))
   194  	}
   195  	if err := iter.Close(); err != nil {
   196  		return provisioned, errors.Annotate(err, "cannot read provisioned machines")
   197  	}
   198  	return provisioned, nil
   199  }
   200  
   201  func (info *UpgradeInfo) isModelUUIDUpgradeDone() (bool, error) {
   202  	instanceData, closer := info.st.getRawCollection(instanceDataC)
   203  	defer closer()
   204  
   205  	query := instanceData.Find(bson.D{{"model-uuid", bson.D{{"$exists", true}}}})
   206  	n, err := query.Count()
   207  	if err != nil {
   208  		return false, errors.Annotatef(err, "couldn't query instance upgrade status")
   209  	}
   210  	return n > 0, nil
   211  }
   212  
   213  // SetStatus sets the status of the current upgrade. Checks are made
   214  // to ensure that status changes are performed in the correct order.
   215  func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error {
   216  	var assertSane bson.D
   217  	switch status {
   218  	case UpgradePending, UpgradeComplete, UpgradeAborted:
   219  		return errors.Errorf("cannot explicitly set upgrade status to \"%s\"", status)
   220  	case UpgradeRunning:
   221  		assertSane = bson.D{{"status", bson.D{{"$in",
   222  			[]UpgradeStatus{UpgradePending, UpgradeRunning},
   223  		}}}}
   224  	case UpgradeFinishing:
   225  		assertSane = bson.D{{"status", bson.D{{"$in",
   226  			[]UpgradeStatus{UpgradeRunning, UpgradeFinishing},
   227  		}}}}
   228  	default:
   229  		return errors.Errorf("unknown upgrade status: %s", status)
   230  	}
   231  	if info.doc.Id != currentUpgradeId {
   232  		return errors.New("cannot set status on non-current upgrade")
   233  	}
   234  
   235  	ops := []txn.Op{{
   236  		C:  upgradeInfoC,
   237  		Id: currentUpgradeId,
   238  		Assert: append(bson.D{{
   239  			"previousVersion", info.doc.PreviousVersion,
   240  		}, {
   241  			"targetVersion", info.doc.TargetVersion,
   242  		}}, assertSane...),
   243  		Update: bson.D{{"$set", bson.D{{"status", status}}}},
   244  	}}
   245  	err := info.st.runTransaction(ops)
   246  	if err == txn.ErrAborted {
   247  		return errors.Errorf("cannot set upgrade status to %q: Another "+
   248  			"status change may have occurred concurrently", status)
   249  	}
   250  	return errors.Annotate(err, "cannot set upgrade status")
   251  }
   252  
   253  // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the
   254  // supplied versions. If a matching upgrade is in progress, that upgrade is returned;
   255  // if there's a mismatch, an error is returned. The supplied machine id must correspond
   256  // to a current controller.
   257  func (st *State) EnsureUpgradeInfo(machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) {
   258  
   259  	assertSanity, err := checkUpgradeInfoSanity(st, machineId, previousVersion, targetVersion)
   260  	if err != nil {
   261  		return nil, errors.Trace(err)
   262  	}
   263  
   264  	doc := upgradeInfoDoc{
   265  		Id:              currentUpgradeId,
   266  		PreviousVersion: previousVersion,
   267  		TargetVersion:   targetVersion,
   268  		Status:          UpgradePending,
   269  		// TODO(fwereade): 2016-03-17 lp:1558657
   270  		Started:          time.Now().UTC(),
   271  		ControllersReady: []string{machineId},
   272  	}
   273  
   274  	machine, err := st.Machine(machineId)
   275  	if err != nil {
   276  		return nil, errors.Trace(err)
   277  	}
   278  
   279  	ops := []txn.Op{{
   280  		C:      upgradeInfoC,
   281  		Id:     currentUpgradeId,
   282  		Assert: txn.DocMissing,
   283  		Insert: doc,
   284  	}, {
   285  		C:      instanceDataC,
   286  		Id:     machine.doc.DocID,
   287  		Assert: txn.DocExists,
   288  	}}
   289  	if err := st.runRawTransaction(ops); err == nil {
   290  		return &UpgradeInfo{st: st, doc: doc}, nil
   291  	} else if err != txn.ErrAborted {
   292  		return nil, errors.Annotate(err, "cannot create upgrade info")
   293  	}
   294  
   295  	if provisioned, err := st.isMachineProvisioned(machineId); err != nil {
   296  		return nil, errors.Trace(err)
   297  	} else if !provisioned {
   298  		return nil, errors.Errorf(
   299  			"machine %s is not provisioned and should not be participating in upgrades",
   300  			machineId)
   301  	}
   302  
   303  	if info, err := ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion); err == nil {
   304  		return info, nil
   305  	} else if errors.Cause(err) != errUpgradeInfoNotUpdated {
   306  		return nil, errors.Trace(err)
   307  	}
   308  
   309  	ops = []txn.Op{{
   310  		C:      upgradeInfoC,
   311  		Id:     currentUpgradeId,
   312  		Assert: assertSanity,
   313  		Update: bson.D{{
   314  			"$addToSet", bson.D{{"controllersReady", machineId}},
   315  		}},
   316  	}}
   317  	switch err := st.runTransaction(ops); err {
   318  	case nil:
   319  		return ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion)
   320  	case txn.ErrAborted:
   321  		return nil, errors.New("upgrade info changed during update")
   322  	}
   323  	return nil, errors.Annotate(err, "cannot update upgrade info")
   324  }
   325  
   326  func (st *State) isMachineProvisioned(machineId string) (bool, error) {
   327  	instanceData, closer := st.getRawCollection(instanceDataC)
   328  	defer closer()
   329  
   330  	for _, id := range []string{st.docID(machineId), machineId} {
   331  		count, err := instanceData.FindId(id).Count()
   332  		if err != nil {
   333  			return false, errors.Annotate(err, "cannot read instance data")
   334  		}
   335  		if count > 0 {
   336  			return true, nil
   337  		}
   338  	}
   339  	return false, nil
   340  }
   341  
   342  var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated")
   343  
   344  func ensureUpgradeInfoUpdated(st *State, machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) {
   345  	var doc upgradeInfoDoc
   346  	if pdoc, err := currentUpgradeInfoDoc(st); err != nil {
   347  		return nil, errors.Trace(err)
   348  	} else {
   349  		doc = *pdoc
   350  	}
   351  
   352  	if doc.PreviousVersion != previousVersion {
   353  		return nil, errors.Errorf(
   354  			"current upgrade info mismatch: expected previous version %s, got %s",
   355  			previousVersion, doc.PreviousVersion)
   356  	}
   357  	if doc.TargetVersion != targetVersion {
   358  		return nil, errors.Errorf(
   359  			"current upgrade info mismatch: expected target version %s, got %s",
   360  			targetVersion, doc.TargetVersion)
   361  	}
   362  
   363  	controllersReady := set.NewStrings(doc.ControllersReady...)
   364  	if !controllersReady.Contains(machineId) {
   365  		return nil, errors.Trace(errUpgradeInfoNotUpdated)
   366  	}
   367  	return &UpgradeInfo{st: st, doc: doc}, nil
   368  }
   369  
   370  // SetControllerDone marks the supplied state machineId as having
   371  // completed its upgrades. When SetControllerDone is called by the
   372  // last provisioned controller, the current upgrade info document
   373  // will be archived with a status of UpgradeComplete.
   374  func (info *UpgradeInfo) SetControllerDone(machineId string) error {
   375  	assertSanity, err := checkUpgradeInfoSanity(info.st, machineId,
   376  		info.doc.PreviousVersion, info.doc.TargetVersion)
   377  	if err != nil {
   378  		return errors.Trace(err)
   379  	}
   380  
   381  	buildTxn := func(attempt int) ([]txn.Op, error) {
   382  		doc, err := currentUpgradeInfoDoc(info.st)
   383  		if errors.IsNotFound(err) {
   384  			return nil, jujutxn.ErrNoOperations
   385  		} else if err != nil {
   386  			return nil, errors.Trace(err)
   387  		}
   388  		switch doc.Status {
   389  		case UpgradePending, UpgradeRunning:
   390  			return nil, errors.New("upgrade has not yet run")
   391  		}
   392  
   393  		controllersDone := set.NewStrings(doc.ControllersDone...)
   394  		if controllersDone.Contains(machineId) {
   395  			return nil, jujutxn.ErrNoOperations
   396  		}
   397  		controllersDone.Add(machineId)
   398  
   399  		controllersReady := set.NewStrings(doc.ControllersReady...)
   400  		controllersNotDone := controllersReady.Difference(controllersDone)
   401  		if controllersNotDone.IsEmpty() {
   402  			// This is the last controller. Archive the current
   403  			// upgradeInfo document.
   404  			doc.ControllersDone = controllersDone.SortedValues()
   405  			return info.makeArchiveOps(doc, UpgradeComplete), nil
   406  		}
   407  
   408  		return []txn.Op{{
   409  			C:  upgradeInfoC,
   410  			Id: currentUpgradeId,
   411  			// This is not the last controller, but we need to be
   412  			// sure it still isn't when we run this.
   413  			Assert: append(assertSanity, bson.D{{
   414  				"controllersDone", bson.D{{"$nin", controllersNotDone.Values()}},
   415  			}}...),
   416  			Update: bson.D{{"$addToSet", bson.D{{"controllersDone", machineId}}}},
   417  		}}, nil
   418  	}
   419  	err = info.st.run(buildTxn)
   420  	return errors.Annotate(err, "cannot complete upgrade")
   421  }
   422  
   423  // Abort marks the current upgrade as aborted. It should be called if
   424  // the upgrade can't be completed for some reason.
   425  func (info *UpgradeInfo) Abort() error {
   426  	buildTxn := func(attempt int) ([]txn.Op, error) {
   427  		doc, err := currentUpgradeInfoDoc(info.st)
   428  		if errors.IsNotFound(err) {
   429  			return nil, jujutxn.ErrNoOperations
   430  		} else if err != nil {
   431  			return nil, errors.Trace(err)
   432  		}
   433  		return info.makeArchiveOps(doc, UpgradeAborted), nil
   434  	}
   435  	err := info.st.run(buildTxn)
   436  	return errors.Annotate(err, "cannot abort upgrade")
   437  }
   438  
   439  func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op {
   440  	doc.Status = status
   441  	doc.Id = bson.NewObjectId().String() // change id to archive value
   442  	return []txn.Op{{
   443  		C:      upgradeInfoC,
   444  		Id:     currentUpgradeId,
   445  		Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion),
   446  		Remove: true,
   447  	}, {
   448  		C:      upgradeInfoC,
   449  		Id:     doc.Id,
   450  		Assert: txn.DocMissing,
   451  		Insert: doc,
   452  	}}
   453  }
   454  
   455  // IsUpgrading returns true if an upgrade is currently in progress.
   456  func (st *State) IsUpgrading() (bool, error) {
   457  	doc, err := currentUpgradeInfoDoc(st)
   458  	if doc != nil && err == nil {
   459  		return true, nil
   460  	} else if errors.IsNotFound(err) {
   461  		return false, nil
   462  	} else {
   463  		return false, errors.Trace(err)
   464  	}
   465  }
   466  
   467  // AbortCurrentUpgrade archives any current UpgradeInfo and sets its
   468  // status to UpgradeAborted. Nothing happens if there's no current
   469  // UpgradeInfo.
   470  func (st *State) AbortCurrentUpgrade() error {
   471  	doc, err := currentUpgradeInfoDoc(st)
   472  	if err != nil {
   473  		if errors.IsNotFound(err) {
   474  			return nil
   475  		}
   476  		return errors.Trace(err)
   477  	}
   478  	info := &UpgradeInfo{st: st, doc: *doc}
   479  	return errors.Trace(info.Abort())
   480  
   481  }
   482  
   483  func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) {
   484  	var doc upgradeInfoDoc
   485  	upgradeInfo, closer := st.getCollection(upgradeInfoC)
   486  	defer closer()
   487  	if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound {
   488  		return nil, errors.NotFoundf("current upgrade info")
   489  	} else if err != nil {
   490  		return nil, errors.Annotate(err, "cannot read upgrade info")
   491  	}
   492  	return &doc, nil
   493  }
   494  
   495  func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) {
   496  	if previousVersion.Compare(targetVersion) != -1 {
   497  		return nil, errors.Errorf("cannot sanely upgrade from %s to %s", previousVersion, targetVersion)
   498  	}
   499  	controllerInfo, err := st.ControllerInfo()
   500  	if err != nil {
   501  		return nil, errors.Annotate(err, "cannot read controllers")
   502  	}
   503  	validIds := set.NewStrings(controllerInfo.MachineIds...)
   504  	if !validIds.Contains(machineId) {
   505  		return nil, errors.Errorf("machine %q is not a controller", machineId)
   506  	}
   507  	return assertExpectedVersions(previousVersion, targetVersion), nil
   508  }
   509  
   510  func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D {
   511  	return bson.D{{
   512  		"previousVersion", previousVersion,
   513  	}, {
   514  		"targetVersion", targetVersion,
   515  	}}
   516  }
   517  
   518  // ClearUpgradeInfo clears information about an upgrade in progress. It returns
   519  // an error if no upgrade is current.
   520  func (st *State) ClearUpgradeInfo() error {
   521  	ops := []txn.Op{{
   522  		C:      upgradeInfoC,
   523  		Id:     currentUpgradeId,
   524  		Assert: txn.DocExists,
   525  		Remove: true,
   526  	}}
   527  	err := st.runTransaction(ops)
   528  	return errors.Annotate(err, "cannot clear upgrade info")
   529  }