github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/upgrade.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  /*
     5  This file defines infrastructure for synchronising controller tools
     6  upgrades. Synchronisation is handled via a mongo DB document in the
     7  "upgradeInfo" collection.
     8  
     9  The functionality here is intended to be used as follows:
    10  
    11  1. When controllers come up running the new tools version, they call
    12  EnsureUpgradeInfo before running upgrade steps.
    13  
    14  2a. Any secondary controller watches the UpgradeInfo document and
    15  waits for the status to change to UpgradeFinishing.
    16  
    17  2b. The master controller watches the UpgradeInfo document and waits
    18  for AllProvisionedControllersReady to return true. This indicates
    19  that all provisioned controllers have called EnsureUpgradeInfo and
    20  are ready to upgrade.
    21  
    22  3. The master controller calls SetStatus with UpgradeRunning and
    23  runs its upgrade steps.
    24  
    25  4. The master controller calls SetStatus with UpgradeFinishing and
    26  then calls SetControllerDone with it's own machine id.
    27  
    28  5. Secondary controllers, seeing that the status has changed to
    29  UpgradeFinishing, run their upgrade steps and then call
    30  SetControllerDone when complete.
    31  
    32  6. Once the final controller calls SetControllerDone, the status is
    33  changed to UpgradeComplete and the upgradeInfo document is archived.
    34  */
    35  
    36  package state
    37  
    38  import (
    39  	"time"
    40  
    41  	"github.com/juju/errors"
    42  	jujutxn "github.com/juju/txn"
    43  	"github.com/juju/utils/set"
    44  	"github.com/juju/version"
    45  	"gopkg.in/mgo.v2"
    46  	"gopkg.in/mgo.v2/bson"
    47  	"gopkg.in/mgo.v2/txn"
    48  )
    49  
    50  // UpgradeStatus describes the states an upgrade operation may be in.
    51  type UpgradeStatus string
    52  
    53  const (
    54  	// UpgradePending indicates that an upgrade is queued but not yet started.
    55  	UpgradePending UpgradeStatus = "pending"
    56  
    57  	// UpgradeRunning indicates that the master controller has started
    58  	// running upgrade logic, and other controllers are waiting for it.
    59  	UpgradeRunning UpgradeStatus = "running"
    60  
    61  	// UpgradeFinishing indicates that the master controller has finished
    62  	// running upgrade logic, and other controllers are catching up.
    63  	UpgradeFinishing UpgradeStatus = "finishing"
    64  
    65  	// UpgradeComplete indicates that all controllers have finished running
    66  	// upgrade logic.
    67  	UpgradeComplete UpgradeStatus = "complete"
    68  
    69  	// UpgradeAborted indicates that the upgrade wasn't completed due
    70  	// to some problem.
    71  	UpgradeAborted UpgradeStatus = "aborted"
    72  
    73  	// currentUpgradeId is the mongo _id of the current upgrade info document.
    74  	currentUpgradeId = "current"
    75  )
    76  
    77  type upgradeInfoDoc struct {
    78  	Id               string         `bson:"_id"`
    79  	PreviousVersion  version.Number `bson:"previousVersion"`
    80  	TargetVersion    version.Number `bson:"targetVersion"`
    81  	Status           UpgradeStatus  `bson:"status"`
    82  	Started          time.Time      `bson:"started"`
    83  	ControllersReady []string       `bson:"controllersReady"`
    84  	ControllersDone  []string       `bson:"controllersDone"`
    85  }
    86  
    87  // UpgradeInfo is used to synchronise controller upgrades.
    88  type UpgradeInfo struct {
    89  	st  *State
    90  	doc upgradeInfoDoc
    91  }
    92  
    93  // PreviousVersion returns the version being upgraded from.
    94  func (info *UpgradeInfo) PreviousVersion() version.Number {
    95  	return info.doc.PreviousVersion
    96  }
    97  
    98  // TargetVersion returns the version being upgraded to.
    99  func (info *UpgradeInfo) TargetVersion() version.Number {
   100  	return info.doc.TargetVersion
   101  }
   102  
   103  // Status returns the status of the upgrade.
   104  func (info *UpgradeInfo) Status() UpgradeStatus {
   105  	return info.doc.Status
   106  }
   107  
   108  // Started returns the time at which the upgrade was started.
   109  func (info *UpgradeInfo) Started() time.Time {
   110  	return info.doc.Started
   111  }
   112  
   113  // ControllersReady returns the machine ids for controllers that
   114  // have signalled that they are ready for upgrade.
   115  func (info *UpgradeInfo) ControllersReady() []string {
   116  	result := make([]string, len(info.doc.ControllersReady))
   117  	copy(result, info.doc.ControllersReady)
   118  	return result
   119  }
   120  
   121  // ControllersDone returns the machine ids for controllers that
   122  // have completed their upgrades.
   123  func (info *UpgradeInfo) ControllersDone() []string {
   124  	result := make([]string, len(info.doc.ControllersDone))
   125  	copy(result, info.doc.ControllersDone)
   126  	return result
   127  }
   128  
   129  // Refresh updates the contents of the UpgradeInfo from underlying state.
   130  func (info *UpgradeInfo) Refresh() error {
   131  	doc, err := currentUpgradeInfoDoc(info.st)
   132  	if err != nil {
   133  		return errors.Trace(err)
   134  	}
   135  	info.doc = *doc
   136  	return nil
   137  }
   138  
   139  // Watch returns a watcher for the state underlying the current
   140  // UpgradeInfo instance. This is provided purely for convenience.
   141  func (info *UpgradeInfo) Watch() NotifyWatcher {
   142  	return info.st.WatchUpgradeInfo()
   143  }
   144  
   145  // AllProvisionedControllersReady returns true if and only if all controllers
   146  // that have been started by the provisioner have called EnsureUpgradeInfo with
   147  // matching versions.
   148  //
   149  // When this returns true the master state controller can begin it's
   150  // own upgrade.
   151  func (info *UpgradeInfo) AllProvisionedControllersReady() (bool, error) {
   152  	provisioned, err := info.getProvisionedControllers()
   153  	if err != nil {
   154  		return false, errors.Trace(err)
   155  	}
   156  	ready := set.NewStrings(info.doc.ControllersReady...)
   157  	missing := set.NewStrings(provisioned...).Difference(ready)
   158  	return missing.IsEmpty(), nil
   159  }
   160  
   161  func (info *UpgradeInfo) getProvisionedControllers() ([]string, error) {
   162  	var provisioned []string
   163  
   164  	controllerInfo, err := info.st.ControllerInfo()
   165  	if err != nil {
   166  		return provisioned, errors.Annotate(err, "cannot read controllers")
   167  	}
   168  
   169  	upgradeDone, err := info.isModelUUIDUpgradeDone()
   170  	if err != nil {
   171  		return provisioned, errors.Trace(err)
   172  	}
   173  
   174  	// Extract current and provisioned controllers.
   175  	instanceData, closer := info.st.getRawCollection(instanceDataC)
   176  	defer closer()
   177  
   178  	// If instanceData has the env UUID upgrade query using the
   179  	// machineid field, otherwise check using _id.
   180  	var sel bson.D
   181  	var field string
   182  	if upgradeDone {
   183  		sel = bson.D{{"model-uuid", info.st.ModelUUID()}}
   184  		field = "machineid"
   185  	} else {
   186  		field = "_id"
   187  	}
   188  	sel = append(sel, bson.DocElem{field, bson.D{{"$in", controllerInfo.MachineIds}}})
   189  	iter := instanceData.Find(sel).Select(bson.D{{field, true}}).Iter()
   190  
   191  	var doc bson.M
   192  	for iter.Next(&doc) {
   193  		provisioned = append(provisioned, doc[field].(string))
   194  	}
   195  	if err := iter.Close(); err != nil {
   196  		return provisioned, errors.Annotate(err, "cannot read provisioned machines")
   197  	}
   198  	return provisioned, nil
   199  }
   200  
   201  func (info *UpgradeInfo) isModelUUIDUpgradeDone() (bool, error) {
   202  	instanceData, closer := info.st.getRawCollection(instanceDataC)
   203  	defer closer()
   204  
   205  	query := instanceData.Find(bson.D{{"model-uuid", bson.D{{"$exists", true}}}})
   206  	n, err := query.Count()
   207  	if err != nil {
   208  		return false, errors.Annotatef(err, "couldn't query instance upgrade status")
   209  	}
   210  	return n > 0, nil
   211  }
   212  
   213  // SetStatus sets the status of the current upgrade. Checks are made
   214  // to ensure that status changes are performed in the correct order.
   215  func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error {
   216  	var assertSane bson.D
   217  	switch status {
   218  	case UpgradePending, UpgradeComplete, UpgradeAborted:
   219  		return errors.Errorf("cannot explicitly set upgrade status to \"%s\"", status)
   220  	case UpgradeRunning:
   221  		assertSane = bson.D{{"status", bson.D{{"$in",
   222  			[]UpgradeStatus{UpgradePending, UpgradeRunning},
   223  		}}}}
   224  	case UpgradeFinishing:
   225  		assertSane = bson.D{{"status", bson.D{{"$in",
   226  			[]UpgradeStatus{UpgradeRunning, UpgradeFinishing},
   227  		}}}}
   228  	default:
   229  		return errors.Errorf("unknown upgrade status: %s", status)
   230  	}
   231  	if info.doc.Id != currentUpgradeId {
   232  		return errors.New("cannot set status on non-current upgrade")
   233  	}
   234  
   235  	ops := []txn.Op{{
   236  		C:  upgradeInfoC,
   237  		Id: currentUpgradeId,
   238  		Assert: append(bson.D{{
   239  			"previousVersion", info.doc.PreviousVersion,
   240  		}, {
   241  			"targetVersion", info.doc.TargetVersion,
   242  		}}, assertSane...),
   243  		Update: bson.D{{"$set", bson.D{{"status", status}}}},
   244  	}}
   245  	err := info.st.runTransaction(ops)
   246  	if err == txn.ErrAborted {
   247  		return errors.Errorf("cannot set upgrade status to %q: Another "+
   248  			"status change may have occurred concurrently", status)
   249  	}
   250  	return errors.Annotate(err, "cannot set upgrade status")
   251  }
   252  
   253  // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the
   254  // supplied versions. If a matching upgrade is in progress, that upgrade is returned;
   255  // if there's a mismatch, an error is returned. The supplied machine id must correspond
   256  // to a current controller.
   257  func (st *State) EnsureUpgradeInfo(machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) {
   258  
   259  	assertSanity, err := checkUpgradeInfoSanity(st, machineId, previousVersion, targetVersion)
   260  	if err != nil {
   261  		return nil, errors.Trace(err)
   262  	}
   263  
   264  	doc := upgradeInfoDoc{
   265  		Id:               currentUpgradeId,
   266  		PreviousVersion:  previousVersion,
   267  		TargetVersion:    targetVersion,
   268  		Status:           UpgradePending,
   269  		Started:          st.clock.Now().UTC(),
   270  		ControllersReady: []string{machineId},
   271  	}
   272  
   273  	machine, err := st.Machine(machineId)
   274  	if err != nil {
   275  		return nil, errors.Trace(err)
   276  	}
   277  
   278  	ops := []txn.Op{{
   279  		C:      upgradeInfoC,
   280  		Id:     currentUpgradeId,
   281  		Assert: txn.DocMissing,
   282  		Insert: doc,
   283  	}, {
   284  		C:      instanceDataC,
   285  		Id:     machine.doc.DocID,
   286  		Assert: txn.DocExists,
   287  	}}
   288  	if err := st.runRawTransaction(ops); err == nil {
   289  		return &UpgradeInfo{st: st, doc: doc}, nil
   290  	} else if err != txn.ErrAborted {
   291  		return nil, errors.Annotate(err, "cannot create upgrade info")
   292  	}
   293  
   294  	if provisioned, err := st.isMachineProvisioned(machineId); err != nil {
   295  		return nil, errors.Trace(err)
   296  	} else if !provisioned {
   297  		return nil, errors.Errorf(
   298  			"machine %s is not provisioned and should not be participating in upgrades",
   299  			machineId)
   300  	}
   301  
   302  	if info, err := ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion); err == nil {
   303  		return info, nil
   304  	} else if errors.Cause(err) != errUpgradeInfoNotUpdated {
   305  		return nil, errors.Trace(err)
   306  	}
   307  
   308  	ops = []txn.Op{{
   309  		C:      upgradeInfoC,
   310  		Id:     currentUpgradeId,
   311  		Assert: assertSanity,
   312  		Update: bson.D{{
   313  			"$addToSet", bson.D{{"controllersReady", machineId}},
   314  		}},
   315  	}}
   316  	switch err := st.runTransaction(ops); err {
   317  	case nil:
   318  		return ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion)
   319  	case txn.ErrAborted:
   320  		return nil, errors.New("upgrade info changed during update")
   321  	}
   322  	return nil, errors.Annotate(err, "cannot update upgrade info")
   323  }
   324  
   325  func (st *State) isMachineProvisioned(machineId string) (bool, error) {
   326  	instanceData, closer := st.getRawCollection(instanceDataC)
   327  	defer closer()
   328  
   329  	for _, id := range []string{st.docID(machineId), machineId} {
   330  		count, err := instanceData.FindId(id).Count()
   331  		if err != nil {
   332  			return false, errors.Annotate(err, "cannot read instance data")
   333  		}
   334  		if count > 0 {
   335  			return true, nil
   336  		}
   337  	}
   338  	return false, nil
   339  }
   340  
   341  var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated")
   342  
   343  func ensureUpgradeInfoUpdated(st *State, machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) {
   344  	var doc upgradeInfoDoc
   345  	if pdoc, err := currentUpgradeInfoDoc(st); err != nil {
   346  		return nil, errors.Trace(err)
   347  	} else {
   348  		doc = *pdoc
   349  	}
   350  
   351  	if doc.PreviousVersion != previousVersion {
   352  		return nil, errors.Errorf(
   353  			"current upgrade info mismatch: expected previous version %s, got %s",
   354  			previousVersion, doc.PreviousVersion)
   355  	}
   356  	if doc.TargetVersion != targetVersion {
   357  		return nil, errors.Errorf(
   358  			"current upgrade info mismatch: expected target version %s, got %s",
   359  			targetVersion, doc.TargetVersion)
   360  	}
   361  
   362  	controllersReady := set.NewStrings(doc.ControllersReady...)
   363  	if !controllersReady.Contains(machineId) {
   364  		return nil, errors.Trace(errUpgradeInfoNotUpdated)
   365  	}
   366  	return &UpgradeInfo{st: st, doc: doc}, nil
   367  }
   368  
   369  // SetControllerDone marks the supplied state machineId as having
   370  // completed its upgrades. When SetControllerDone is called by the
   371  // last provisioned controller, the current upgrade info document
   372  // will be archived with a status of UpgradeComplete.
   373  func (info *UpgradeInfo) SetControllerDone(machineId string) error {
   374  	assertSanity, err := checkUpgradeInfoSanity(info.st, machineId,
   375  		info.doc.PreviousVersion, info.doc.TargetVersion)
   376  	if err != nil {
   377  		return errors.Trace(err)
   378  	}
   379  
   380  	buildTxn := func(attempt int) ([]txn.Op, error) {
   381  		doc, err := currentUpgradeInfoDoc(info.st)
   382  		if errors.IsNotFound(err) {
   383  			return nil, jujutxn.ErrNoOperations
   384  		} else if err != nil {
   385  			return nil, errors.Trace(err)
   386  		}
   387  		switch doc.Status {
   388  		case UpgradePending, UpgradeRunning:
   389  			return nil, errors.New("upgrade has not yet run")
   390  		}
   391  
   392  		controllersDone := set.NewStrings(doc.ControllersDone...)
   393  		if controllersDone.Contains(machineId) {
   394  			return nil, jujutxn.ErrNoOperations
   395  		}
   396  		controllersDone.Add(machineId)
   397  
   398  		controllersReady := set.NewStrings(doc.ControllersReady...)
   399  		controllersNotDone := controllersReady.Difference(controllersDone)
   400  		if controllersNotDone.IsEmpty() {
   401  			// This is the last controller. Archive the current
   402  			// upgradeInfo document.
   403  			doc.ControllersDone = controllersDone.SortedValues()
   404  			return info.makeArchiveOps(doc, UpgradeComplete), nil
   405  		}
   406  
   407  		return []txn.Op{{
   408  			C:  upgradeInfoC,
   409  			Id: currentUpgradeId,
   410  			// This is not the last controller, but we need to be
   411  			// sure it still isn't when we run this.
   412  			Assert: append(assertSanity, bson.D{{
   413  				"controllersDone", bson.D{{"$nin", controllersNotDone.Values()}},
   414  			}}...),
   415  			Update: bson.D{{"$addToSet", bson.D{{"controllersDone", machineId}}}},
   416  		}}, nil
   417  	}
   418  	err = info.st.run(buildTxn)
   419  	return errors.Annotate(err, "cannot complete upgrade")
   420  }
   421  
   422  // Abort marks the current upgrade as aborted. It should be called if
   423  // the upgrade can't be completed for some reason.
   424  func (info *UpgradeInfo) Abort() error {
   425  	buildTxn := func(attempt int) ([]txn.Op, error) {
   426  		doc, err := currentUpgradeInfoDoc(info.st)
   427  		if errors.IsNotFound(err) {
   428  			return nil, jujutxn.ErrNoOperations
   429  		} else if err != nil {
   430  			return nil, errors.Trace(err)
   431  		}
   432  		return info.makeArchiveOps(doc, UpgradeAborted), nil
   433  	}
   434  	err := info.st.run(buildTxn)
   435  	return errors.Annotate(err, "cannot abort upgrade")
   436  }
   437  
   438  func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op {
   439  	doc.Status = status
   440  	doc.Id = bson.NewObjectId().String() // change id to archive value
   441  	return []txn.Op{{
   442  		C:      upgradeInfoC,
   443  		Id:     currentUpgradeId,
   444  		Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion),
   445  		Remove: true,
   446  	}, {
   447  		C:      upgradeInfoC,
   448  		Id:     doc.Id,
   449  		Assert: txn.DocMissing,
   450  		Insert: doc,
   451  	}}
   452  }
   453  
   454  // IsUpgrading returns true if an upgrade is currently in progress.
   455  func (st *State) IsUpgrading() (bool, error) {
   456  	doc, err := currentUpgradeInfoDoc(st)
   457  	if doc != nil && err == nil {
   458  		return true, nil
   459  	} else if errors.IsNotFound(err) {
   460  		return false, nil
   461  	} else {
   462  		return false, errors.Trace(err)
   463  	}
   464  }
   465  
   466  // AbortCurrentUpgrade archives any current UpgradeInfo and sets its
   467  // status to UpgradeAborted. Nothing happens if there's no current
   468  // UpgradeInfo.
   469  func (st *State) AbortCurrentUpgrade() error {
   470  	doc, err := currentUpgradeInfoDoc(st)
   471  	if err != nil {
   472  		if errors.IsNotFound(err) {
   473  			return nil
   474  		}
   475  		return errors.Trace(err)
   476  	}
   477  	info := &UpgradeInfo{st: st, doc: *doc}
   478  	return errors.Trace(info.Abort())
   479  
   480  }
   481  
   482  func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) {
   483  	var doc upgradeInfoDoc
   484  	upgradeInfo, closer := st.getCollection(upgradeInfoC)
   485  	defer closer()
   486  	if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound {
   487  		return nil, errors.NotFoundf("current upgrade info")
   488  	} else if err != nil {
   489  		return nil, errors.Annotate(err, "cannot read upgrade info")
   490  	}
   491  	return &doc, nil
   492  }
   493  
   494  func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) {
   495  	if previousVersion.Compare(targetVersion) != -1 {
   496  		return nil, errors.Errorf("cannot sanely upgrade from %s to %s", previousVersion, targetVersion)
   497  	}
   498  	controllerInfo, err := st.ControllerInfo()
   499  	if err != nil {
   500  		return nil, errors.Annotate(err, "cannot read controllers")
   501  	}
   502  	validIds := set.NewStrings(controllerInfo.MachineIds...)
   503  	if !validIds.Contains(machineId) {
   504  		return nil, errors.Errorf("machine %q is not a controller", machineId)
   505  	}
   506  	return assertExpectedVersions(previousVersion, targetVersion), nil
   507  }
   508  
   509  func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D {
   510  	return bson.D{{
   511  		"previousVersion", previousVersion,
   512  	}, {
   513  		"targetVersion", targetVersion,
   514  	}}
   515  }
   516  
   517  // ClearUpgradeInfo clears information about an upgrade in progress. It returns
   518  // an error if no upgrade is current.
   519  func (st *State) ClearUpgradeInfo() error {
   520  	ops := []txn.Op{{
   521  		C:      upgradeInfoC,
   522  		Id:     currentUpgradeId,
   523  		Assert: txn.DocExists,
   524  		Remove: true,
   525  	}}
   526  	err := st.runTransaction(ops)
   527  	return errors.Annotate(err, "cannot clear upgrade info")
   528  }