github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/state/upgrade.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  /*
     5  This file defines infrastructure for synchronising state server tools
     6  upgrades. Synchronisation is handled via a mongo DB document in the
     7  "upgradeInfo" collection.
     8  
     9  The functionality here is intended to be used as follows:
    10  
    11  1. When state servers come up running the new tools version, they call
    12  EnsureUpgradeInfo before running upgrade steps.
    13  
    14  2a. Any secondary state server watches the UpgradeInfo document and
    15  waits for the status to change to UpgradeFinishing.
    16  
    17  2b. The master state server watches the UpgradeInfo document and waits
    18  for AllProvisionedStateServersReady to return true. This indicates
    19  that all provisioned state servers have called EnsureUpgradeInfo and
    20  are ready to upgrade.
    21  
    22  3. The master state server calls SetStatus with UpgradeRunning and
    23  runs its upgrade steps.
    24  
    25  4. The master state server calls SetStatus with UpgradeFinishing and
    26  then calls SetStateServerDone with it's own machine id.
    27  
    28  5. Secondary state servers, seeing that the status has changed to
    29  UpgradeFinishing, run their upgrade steps and then call
    30  SetStateServerDone when complete.
    31  
    32  6. Once the final state server calls SetStateServerDone, the status is
    33  changed to UpgradeComplete and the upgradeInfo document is archived.
    34  */
    35  
    36  package state
    37  
    38  import (
    39  	"time"
    40  
    41  	"github.com/juju/errors"
    42  	jujutxn "github.com/juju/txn"
    43  	"github.com/juju/utils/set"
    44  	"gopkg.in/mgo.v2"
    45  	"gopkg.in/mgo.v2/bson"
    46  	"gopkg.in/mgo.v2/txn"
    47  
    48  	"github.com/juju/juju/version"
    49  )
    50  
    51  // UpgradeStatus describes the states an upgrade operation may be in.
    52  type UpgradeStatus string
    53  
    54  const (
    55  	// UpgradePending indicates that an upgrade is queued but not yet started.
    56  	UpgradePending UpgradeStatus = "pending"
    57  
    58  	// UpgradeRunning indicates that the master state server has started
    59  	// running upgrade logic, and other state servers are waiting for it.
    60  	UpgradeRunning UpgradeStatus = "running"
    61  
    62  	// UpgradeFinishing indicates that the master state server has finished
    63  	// running upgrade logic, and other state servers are catching up.
    64  	UpgradeFinishing UpgradeStatus = "finishing"
    65  
    66  	// UpgradeComplete indicates that all state servers have finished running
    67  	// upgrade logic.
    68  	UpgradeComplete UpgradeStatus = "complete"
    69  
    70  	// UpgradeAborted indicates that the upgrade wasn't completed due
    71  	// to some problem.
    72  	UpgradeAborted UpgradeStatus = "aborted"
    73  
    74  	// currentUpgradeId is the mongo _id of the current upgrade info document.
    75  	currentUpgradeId = "current"
    76  )
    77  
    78  type upgradeInfoDoc struct {
    79  	Id                string         `bson:"_id"`
    80  	PreviousVersion   version.Number `bson:"previousVersion"`
    81  	TargetVersion     version.Number `bson:"targetVersion"`
    82  	Status            UpgradeStatus  `bson:"status"`
    83  	Started           time.Time      `bson:"started"`
    84  	StateServersReady []string       `bson:"stateServersReady"`
    85  	StateServersDone  []string       `bson:"stateServersDone"`
    86  }
    87  
    88  // UpgradeInfo is used to synchronise state server upgrades.
    89  type UpgradeInfo struct {
    90  	st  *State
    91  	doc upgradeInfoDoc
    92  }
    93  
    94  // PreviousVersion returns the version being upgraded from.
    95  func (info *UpgradeInfo) PreviousVersion() version.Number {
    96  	return info.doc.PreviousVersion
    97  }
    98  
    99  // TargetVersion returns the version being upgraded to.
   100  func (info *UpgradeInfo) TargetVersion() version.Number {
   101  	return info.doc.TargetVersion
   102  }
   103  
   104  // Status returns the status of the upgrade.
   105  func (info *UpgradeInfo) Status() UpgradeStatus {
   106  	return info.doc.Status
   107  }
   108  
   109  // Started returns the time at which the upgrade was started.
   110  func (info *UpgradeInfo) Started() time.Time {
   111  	return info.doc.Started
   112  }
   113  
   114  // StateServersReady returns the machine ids for state servers that
   115  // have signalled that they are ready for upgrade.
   116  func (info *UpgradeInfo) StateServersReady() []string {
   117  	result := make([]string, len(info.doc.StateServersReady))
   118  	copy(result, info.doc.StateServersReady)
   119  	return result
   120  }
   121  
   122  // StateServersDone returns the machine ids for state servers that
   123  // have completed their upgrades.
   124  func (info *UpgradeInfo) StateServersDone() []string {
   125  	result := make([]string, len(info.doc.StateServersDone))
   126  	copy(result, info.doc.StateServersDone)
   127  	return result
   128  }
   129  
   130  // Refresh updates the contents of the UpgradeInfo from underlying state.
   131  func (info *UpgradeInfo) Refresh() error {
   132  	doc, err := currentUpgradeInfoDoc(info.st)
   133  	if err != nil {
   134  		return errors.Trace(err)
   135  	}
   136  	info.doc = *doc
   137  	return nil
   138  }
   139  
   140  // Watcher returns a watcher for the state underlying the current
   141  // UpgradeInfo instance. This is provided purely for convenience.
   142  func (info *UpgradeInfo) Watch() NotifyWatcher {
   143  	return info.st.WatchUpgradeInfo()
   144  }
   145  
   146  // AllProvisionedStateServersReady returns true if and only if all state servers
   147  // that have been started by the provisioner have called EnsureUpgradeInfo with
   148  // matching versions.
   149  //
   150  // When this returns true the master state state server can begin it's
   151  // own upgrade.
   152  func (info *UpgradeInfo) AllProvisionedStateServersReady() (bool, error) {
   153  	provisioned, err := info.getProvisionedStateServers()
   154  	if err != nil {
   155  		return false, errors.Trace(err)
   156  	}
   157  	ready := set.NewStrings(info.doc.StateServersReady...)
   158  	missing := set.NewStrings(provisioned...).Difference(ready)
   159  	return missing.IsEmpty(), nil
   160  }
   161  
   162  func (info *UpgradeInfo) getProvisionedStateServers() ([]string, error) {
   163  	var provisioned []string
   164  
   165  	stateServerInfo, err := info.st.StateServerInfo()
   166  	if err != nil {
   167  		return provisioned, errors.Annotate(err, "cannot read state servers")
   168  	}
   169  
   170  	upgradeDone, err := info.isEnvUUIDUpgradeDone()
   171  	if err != nil {
   172  		return provisioned, errors.Trace(err)
   173  	}
   174  
   175  	// Extract current and provisioned state servers.
   176  	instanceData, closer := info.st.getRawCollection(instanceDataC)
   177  	defer closer()
   178  
   179  	// If instanceData has the env UUID upgrade query using the
   180  	// machineid field, otherwise check using _id.
   181  	var sel bson.D
   182  	var field string
   183  	if upgradeDone {
   184  		sel = bson.D{{"env-uuid", info.st.EnvironUUID()}}
   185  		field = "machineid"
   186  	} else {
   187  		field = "_id"
   188  	}
   189  	sel = append(sel, bson.DocElem{field, bson.D{{"$in", stateServerInfo.MachineIds}}})
   190  	iter := instanceData.Find(sel).Select(bson.D{{field, true}}).Iter()
   191  
   192  	var doc bson.M
   193  	for iter.Next(&doc) {
   194  		provisioned = append(provisioned, doc[field].(string))
   195  	}
   196  	if err := iter.Close(); err != nil {
   197  		return provisioned, errors.Annotate(err, "cannot read provisioned machines")
   198  	}
   199  	return provisioned, nil
   200  }
   201  
   202  func (info *UpgradeInfo) isEnvUUIDUpgradeDone() (bool, error) {
   203  	instanceData, closer := info.st.getRawCollection(instanceDataC)
   204  	defer closer()
   205  
   206  	query := instanceData.Find(bson.D{{"env-uuid", bson.D{{"$exists", true}}}})
   207  	n, err := query.Count()
   208  	if err != nil {
   209  		return false, errors.Annotatef(err, "couldn't query instance upgrade status")
   210  	}
   211  	return n > 0, nil
   212  }
   213  
   214  // SetStatus sets the status of the current upgrade. Checks are made
   215  // to ensure that status changes are performed in the correct order.
   216  func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error {
   217  	var assertSane bson.D
   218  	switch status {
   219  	case UpgradePending, UpgradeComplete, UpgradeAborted:
   220  		return errors.Errorf("cannot explicitly set upgrade status to \"%s\"", status)
   221  	case UpgradeRunning:
   222  		assertSane = bson.D{{"status", bson.D{{"$in",
   223  			[]UpgradeStatus{UpgradePending, UpgradeRunning},
   224  		}}}}
   225  	case UpgradeFinishing:
   226  		assertSane = bson.D{{"status", bson.D{{"$in",
   227  			[]UpgradeStatus{UpgradeRunning, UpgradeFinishing},
   228  		}}}}
   229  	default:
   230  		return errors.Errorf("unknown upgrade status: %s", status)
   231  	}
   232  	if info.doc.Id != currentUpgradeId {
   233  		return errors.New("cannot set status on non-current upgrade")
   234  	}
   235  
   236  	ops := []txn.Op{{
   237  		C:  upgradeInfoC,
   238  		Id: currentUpgradeId,
   239  		Assert: append(bson.D{{
   240  			"previousVersion", info.doc.PreviousVersion,
   241  		}, {
   242  			"targetVersion", info.doc.TargetVersion,
   243  		}}, assertSane...),
   244  		Update: bson.D{{"$set", bson.D{{"status", status}}}},
   245  	}}
   246  	err := info.st.runTransaction(ops)
   247  	if err == txn.ErrAborted {
   248  		return errors.Errorf("cannot set upgrade status to %q: Another "+
   249  			"status change may have occurred concurrently", status)
   250  	}
   251  	return errors.Annotate(err, "cannot set upgrade status")
   252  }
   253  
   254  // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the
   255  // supplied versions. If a matching upgrade is in progress, that upgrade is returned;
   256  // if there's a mismatch, an error is returned. The supplied machine id must correspond
   257  // to a current state server.
   258  func (st *State) EnsureUpgradeInfo(machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) {
   259  
   260  	assertSanity, err := checkUpgradeInfoSanity(st, machineId, previousVersion, targetVersion)
   261  	if err != nil {
   262  		return nil, errors.Trace(err)
   263  	}
   264  
   265  	doc := upgradeInfoDoc{
   266  		Id:                currentUpgradeId,
   267  		PreviousVersion:   previousVersion,
   268  		TargetVersion:     targetVersion,
   269  		Status:            UpgradePending,
   270  		Started:           time.Now().UTC(),
   271  		StateServersReady: []string{machineId},
   272  	}
   273  
   274  	machine, err := st.Machine(machineId)
   275  	if err != nil {
   276  		return nil, errors.Trace(err)
   277  	}
   278  
   279  	ops := []txn.Op{{
   280  		C:      upgradeInfoC,
   281  		Id:     currentUpgradeId,
   282  		Assert: txn.DocMissing,
   283  		Insert: doc,
   284  	}, {
   285  		C:      instanceDataC,
   286  		Id:     machine.doc.DocID,
   287  		Assert: txn.DocExists,
   288  	}}
   289  	if err := st.runRawTransaction(ops); err == nil {
   290  		return &UpgradeInfo{st: st, doc: doc}, nil
   291  	} else if err != txn.ErrAborted {
   292  		return nil, errors.Annotate(err, "cannot create upgrade info")
   293  	}
   294  
   295  	if provisioned, err := st.isMachineProvisioned(machineId); err != nil {
   296  		return nil, errors.Trace(err)
   297  	} else if !provisioned {
   298  		return nil, errors.Errorf(
   299  			"machine %s is not provisioned and should not be participating in upgrades",
   300  			machineId)
   301  	}
   302  
   303  	if info, err := ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion); err == nil {
   304  		return info, nil
   305  	} else if errors.Cause(err) != errUpgradeInfoNotUpdated {
   306  		return nil, errors.Trace(err)
   307  	}
   308  
   309  	ops = []txn.Op{{
   310  		C:      upgradeInfoC,
   311  		Id:     currentUpgradeId,
   312  		Assert: assertSanity,
   313  		Update: bson.D{{
   314  			"$addToSet", bson.D{{"stateServersReady", machineId}},
   315  		}},
   316  	}}
   317  	switch err := st.runTransaction(ops); err {
   318  	case nil:
   319  		return ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion)
   320  	case txn.ErrAborted:
   321  		return nil, errors.New("upgrade info changed during update")
   322  	}
   323  	return nil, errors.Annotate(err, "cannot update upgrade info")
   324  }
   325  
   326  func (st *State) isMachineProvisioned(machineId string) (bool, error) {
   327  	instanceData, closer := st.getRawCollection(instanceDataC)
   328  	defer closer()
   329  
   330  	for _, id := range []string{st.docID(machineId), machineId} {
   331  		count, err := instanceData.FindId(id).Count()
   332  		if err != nil {
   333  			return false, errors.Annotate(err, "cannot read instance data")
   334  		}
   335  		if count > 0 {
   336  			return true, nil
   337  		}
   338  	}
   339  	return false, nil
   340  }
   341  
   342  var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated")
   343  
   344  func ensureUpgradeInfoUpdated(st *State, machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) {
   345  	var doc upgradeInfoDoc
   346  	if pdoc, err := currentUpgradeInfoDoc(st); err != nil {
   347  		return nil, errors.Trace(err)
   348  	} else {
   349  		doc = *pdoc
   350  	}
   351  
   352  	if doc.PreviousVersion != previousVersion {
   353  		return nil, errors.Errorf(
   354  			"current upgrade info mismatch: expected previous version %s, got %s",
   355  			previousVersion, doc.PreviousVersion)
   356  	}
   357  	if doc.TargetVersion != targetVersion {
   358  		return nil, errors.Errorf(
   359  			"current upgrade info mismatch: expected target version %s, got %s",
   360  			targetVersion, doc.TargetVersion)
   361  	}
   362  
   363  	stateServersReady := set.NewStrings(doc.StateServersReady...)
   364  	if !stateServersReady.Contains(machineId) {
   365  		return nil, errors.Trace(errUpgradeInfoNotUpdated)
   366  	}
   367  	return &UpgradeInfo{st: st, doc: doc}, nil
   368  }
   369  
   370  // SetStateServerDone marks the supplied state machineId as having
   371  // completed its upgrades. When SetStateServerDone is called by the
   372  // last provisioned state server, the current upgrade info document
   373  // will be archived with a status of UpgradeComplete.
   374  func (info *UpgradeInfo) SetStateServerDone(machineId string) error {
   375  	assertSanity, err := checkUpgradeInfoSanity(info.st, machineId,
   376  		info.doc.PreviousVersion, info.doc.TargetVersion)
   377  	if err != nil {
   378  		return errors.Trace(err)
   379  	}
   380  
   381  	buildTxn := func(attempt int) ([]txn.Op, error) {
   382  		doc, err := currentUpgradeInfoDoc(info.st)
   383  		if errors.IsNotFound(err) {
   384  			return nil, jujutxn.ErrNoOperations
   385  		} else if err != nil {
   386  			return nil, errors.Trace(err)
   387  		}
   388  		switch doc.Status {
   389  		case UpgradePending, UpgradeRunning:
   390  			return nil, errors.New("upgrade has not yet run")
   391  		}
   392  
   393  		stateServersDone := set.NewStrings(doc.StateServersDone...)
   394  		if stateServersDone.Contains(machineId) {
   395  			return nil, jujutxn.ErrNoOperations
   396  		}
   397  		stateServersDone.Add(machineId)
   398  
   399  		stateServersReady := set.NewStrings(doc.StateServersReady...)
   400  		stateServersNotDone := stateServersReady.Difference(stateServersDone)
   401  		if stateServersNotDone.IsEmpty() {
   402  			// This is the last state server. Archive the current
   403  			// upgradeInfo document.
   404  			doc.StateServersDone = stateServersDone.SortedValues()
   405  			return info.makeArchiveOps(doc, UpgradeComplete), nil
   406  		}
   407  
   408  		return []txn.Op{{
   409  			C:  upgradeInfoC,
   410  			Id: currentUpgradeId,
   411  			// This is not the last state server, but we need to be
   412  			// sure it still isn't when we run this.
   413  			Assert: append(assertSanity, bson.D{{
   414  				"stateServersDone", bson.D{{"$nin", stateServersNotDone.Values()}},
   415  			}}...),
   416  			Update: bson.D{{"$addToSet", bson.D{{"stateServersDone", machineId}}}},
   417  		}}, nil
   418  	}
   419  	err = info.st.run(buildTxn)
   420  	return errors.Annotate(err, "cannot complete upgrade")
   421  }
   422  
   423  // Abort marks the current upgrade as aborted. It should be called if
   424  // the upgrade can't be completed for some reason.
   425  func (info *UpgradeInfo) Abort() error {
   426  	buildTxn := func(attempt int) ([]txn.Op, error) {
   427  		doc, err := currentUpgradeInfoDoc(info.st)
   428  		if errors.IsNotFound(err) {
   429  			return nil, jujutxn.ErrNoOperations
   430  		} else if err != nil {
   431  			return nil, errors.Trace(err)
   432  		}
   433  		return info.makeArchiveOps(doc, UpgradeAborted), nil
   434  	}
   435  	err := info.st.run(buildTxn)
   436  	return errors.Annotate(err, "cannot abort upgrade")
   437  }
   438  
   439  func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op {
   440  	doc.Status = status
   441  	doc.Id = bson.NewObjectId().String() // change id to archive value
   442  	return []txn.Op{{
   443  		C:      upgradeInfoC,
   444  		Id:     currentUpgradeId,
   445  		Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion),
   446  		Remove: true,
   447  	}, {
   448  		C:      upgradeInfoC,
   449  		Id:     doc.Id,
   450  		Assert: txn.DocMissing,
   451  		Insert: doc,
   452  	}}
   453  }
   454  
   455  // IsUpgrading returns true if an upgrade is currently in progress.
   456  func (st *State) IsUpgrading() (bool, error) {
   457  	doc, err := currentUpgradeInfoDoc(st)
   458  	if doc != nil && err == nil {
   459  		return true, nil
   460  	} else if errors.IsNotFound(err) {
   461  		return false, nil
   462  	} else {
   463  		return false, errors.Trace(err)
   464  	}
   465  }
   466  
   467  // AbortCurrentUpgrade archives any current UpgradeInfo and sets its
   468  // status to UpgradeAborted. Nothing happens if there's no current
   469  // UpgradeInfo.
   470  func (st *State) AbortCurrentUpgrade() error {
   471  	doc, err := currentUpgradeInfoDoc(st)
   472  	if err != nil {
   473  		if errors.IsNotFound(err) {
   474  			return nil
   475  		}
   476  		return errors.Trace(err)
   477  	}
   478  	info := &UpgradeInfo{st: st, doc: *doc}
   479  	return errors.Trace(info.Abort())
   480  
   481  }
   482  
   483  func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) {
   484  	var doc upgradeInfoDoc
   485  	upgradeInfo, closer := st.getCollection(upgradeInfoC)
   486  	defer closer()
   487  	if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound {
   488  		return nil, errors.NotFoundf("current upgrade info")
   489  	} else if err != nil {
   490  		return nil, errors.Annotate(err, "cannot read upgrade info")
   491  	}
   492  	return &doc, nil
   493  }
   494  
   495  func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) {
   496  	if previousVersion.Compare(targetVersion) != -1 {
   497  		return nil, errors.Errorf("cannot sanely upgrade from %s to %s", previousVersion, targetVersion)
   498  	}
   499  	stateServerInfo, err := st.StateServerInfo()
   500  	if err != nil {
   501  		return nil, errors.Annotate(err, "cannot read state servers")
   502  	}
   503  	validIds := set.NewStrings(stateServerInfo.MachineIds...)
   504  	if !validIds.Contains(machineId) {
   505  		return nil, errors.Errorf("machine %q is not a state server", machineId)
   506  	}
   507  	return assertExpectedVersions(previousVersion, targetVersion), nil
   508  }
   509  
   510  func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D {
   511  	return bson.D{{
   512  		"previousVersion", previousVersion,
   513  	}, {
   514  		"targetVersion", targetVersion,
   515  	}}
   516  }
   517  
   518  // ClearUpgradeInfo clears information about an upgrade in progress. It returns
   519  // an error if no upgrade is current.
   520  func (st *State) ClearUpgradeInfo() error {
   521  	ops := []txn.Op{{
   522  		C:      upgradeInfoC,
   523  		Id:     currentUpgradeId,
   524  		Assert: txn.DocExists,
   525  		Remove: true,
   526  	}}
   527  	err := st.runTransaction(ops)
   528  	return errors.Annotate(err, "cannot clear upgrade info")
   529  }