github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/status.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"fmt"
     8  	"reflect"
     9  	"time"
    10  
    11  	"github.com/juju/clock"
    12  	"github.com/juju/errors"
    13  	"github.com/juju/loggo"
    14  	"github.com/juju/mgo/v3"
    15  	"github.com/juju/mgo/v3/bson"
    16  	"github.com/juju/mgo/v3/txn"
    17  	jujutxn "github.com/juju/txn/v3"
    18  
    19  	"github.com/juju/juju/core/leadership"
    20  	"github.com/juju/juju/core/status"
    21  	"github.com/juju/juju/mongo"
    22  	"github.com/juju/juju/mongo/utils"
    23  )
    24  
    25  type displayStatusFunc func(unitStatus status.StatusInfo, containerStatus status.StatusInfo, expectWorkload bool) status.StatusInfo
    26  
    27  // ModelStatus holds all the current status values for a given model
    28  // and offers accessors for the various parts of a model.
    29  type ModelStatus struct {
    30  	model *Model
    31  	docs  map[string]statusDocWithID
    32  }
    33  
    34  // LoadModelStatus retrieves all the status documents for the model
    35  // at once. Used to primarily speed up status.
    36  func (m *Model) LoadModelStatus() (*ModelStatus, error) {
    37  	statuses, closer := m.st.db().GetCollection(statusesC)
    38  	defer closer()
    39  
    40  	var docs []statusDocWithID
    41  	err := statuses.Find(nil).All(&docs)
    42  	if err != nil {
    43  		return nil, errors.Annotate(err, "failed to read status collection")
    44  	}
    45  
    46  	result := &ModelStatus{
    47  		model: m,
    48  		docs:  make(map[string]statusDocWithID),
    49  	}
    50  	for _, doc := range docs {
    51  		id := m.localID(doc.ID)
    52  		result.docs[id] = doc
    53  	}
    54  
    55  	return result, nil
    56  }
    57  
    58  func (m *ModelStatus) getDoc(key, badge string) (statusDocWithID, error) {
    59  	doc, found := m.docs[key]
    60  	if !found {
    61  		return statusDocWithID{}, errors.Annotate(errors.NotFoundf(badge), "cannot get status")
    62  	}
    63  	return doc, nil
    64  }
    65  
    66  func (m *ModelStatus) getStatus(key, badge string) (status.StatusInfo, error) {
    67  	doc, err := m.getDoc(key, badge)
    68  	if err != nil {
    69  		return status.StatusInfo{}, err
    70  	}
    71  	return doc.asStatusInfo(), nil
    72  }
    73  
    74  // Model returns the status of the model.
    75  func (m *ModelStatus) Model() (status.StatusInfo, error) {
    76  	return m.getStatus(m.model.globalKey(), "model")
    77  }
    78  
    79  // MachineAgent returns the status of the machine agent.
    80  func (m *ModelStatus) MachineAgent(machineID string) (status.StatusInfo, error) {
    81  	return m.getStatus(machineGlobalKey(machineID), "machine")
    82  }
    83  
    84  // MachineInstance returns the status of the machine instance.
    85  func (m *ModelStatus) MachineInstance(machineID string) (status.StatusInfo, error) {
    86  	return m.getStatus(machineGlobalInstanceKey(machineID), "instance")
    87  }
    88  
    89  // MachineModification returns the status of the machine modification
    90  func (m *ModelStatus) MachineModification(machineID string) (status.StatusInfo, error) {
    91  	return m.getStatus(machineGlobalModificationKey(machineID), "modification")
    92  }
    93  
    94  // FullUnitWorkloadVersion returns the full status info for the workload
    95  // version of a unit. This is used for selecting the workload version for
    96  // an application.
    97  func (m *ModelStatus) FullUnitWorkloadVersion(unitName string) (status.StatusInfo, error) {
    98  	return m.getStatus(globalWorkloadVersionKey(unitName), "workload")
    99  }
   100  
   101  // UnitWorkloadVersion returns workload version for the unit
   102  func (m *ModelStatus) UnitWorkloadVersion(unitName string) (string, error) {
   103  	info, err := m.getStatus(globalWorkloadVersionKey(unitName), "workload")
   104  	if err != nil {
   105  		return "", err
   106  	}
   107  	return info.Message, nil
   108  }
   109  
   110  // UnitAgent returns the status of the Unit's agent.
   111  func (m *ModelStatus) UnitAgent(unitName string) (status.StatusInfo, error) {
   112  	// We do horrible things with unit status.
   113  	// See notes in unitagent.go.
   114  	info, err := m.getStatus(unitAgentGlobalKey(unitName), "agent")
   115  	if err != nil {
   116  		return info, err
   117  	}
   118  	if info.Status == status.Error {
   119  		return status.StatusInfo{
   120  			Status:  status.Idle,
   121  			Message: "",
   122  			Data:    map[string]interface{}{},
   123  			Since:   info.Since,
   124  		}, nil
   125  	}
   126  	return info, nil
   127  }
   128  
   129  // UnitWorkload returns the status of the unit's workload.
   130  func (m *ModelStatus) UnitWorkload(unitName string, expectWorkload bool) (status.StatusInfo, error) {
   131  	// We do horrible things with unit status.
   132  	// See notes in unit.go.
   133  	info, err := m.getStatus(unitAgentGlobalKey(unitName), "unit")
   134  	if err != nil {
   135  		return info, err
   136  	} else if info.Status == status.Error {
   137  		return info, nil
   138  	}
   139  
   140  	// (for CAAS models) Use cloud container status over unit if the cloud
   141  	// container status is error or active or the unit status hasn't shifted
   142  	// from 'allocating'
   143  	info, err = m.getStatus(unitGlobalKey(unitName), "workload")
   144  	if err != nil {
   145  		return info, errors.Trace(err)
   146  	}
   147  
   148  	if m.model.Type() == ModelTypeIAAS {
   149  		return info, nil
   150  	}
   151  
   152  	containerInfo, err := m.getStatus(globalCloudContainerKey(unitName), "cloud container")
   153  	if err != nil && !errors.IsNotFound(err) {
   154  		return info, err
   155  	}
   156  	return status.UnitDisplayStatus(info, containerInfo, expectWorkload), nil
   157  }
   158  
   159  // caasHistoryRewriteDoc determines which status should be stored as history.
   160  func caasHistoryRewriteDoc(jujuStatus, caasStatus status.StatusInfo, expectWorkload bool, displayStatus displayStatusFunc, clock clock.Clock) (*statusDoc, error) {
   161  	modifiedStatus := displayStatus(jujuStatus, caasStatus, expectWorkload)
   162  	if modifiedStatus.Status == jujuStatus.Status && modifiedStatus.Message == jujuStatus.Message {
   163  		return nil, nil
   164  	}
   165  	return &statusDoc{
   166  		Status:     modifiedStatus.Status,
   167  		StatusInfo: modifiedStatus.Message,
   168  		StatusData: utils.EscapeKeys(modifiedStatus.Data),
   169  		Updated:    timeOrNow(modifiedStatus.Since, clock).UnixNano(),
   170  	}, nil
   171  }
   172  
   173  type statusDocWithID struct {
   174  	ID         string                 `bson:"_id"`
   175  	ModelUUID  string                 `bson:"model-uuid"`
   176  	Status     status.Status          `bson:"status"`
   177  	StatusInfo string                 `bson:"statusinfo"`
   178  	StatusData map[string]interface{} `bson:"statusdata"`
   179  	Updated    int64                  `bson:"updated"`
   180  }
   181  
   182  func (doc *statusDocWithID) asStatusInfo() status.StatusInfo {
   183  	return status.StatusInfo{
   184  		Status:  doc.Status,
   185  		Message: doc.StatusInfo,
   186  		Data:    utils.UnescapeKeys(doc.StatusData),
   187  		Since:   unixNanoToTime(doc.Updated),
   188  	}
   189  }
   190  
   191  // statusDoc represents a entity status in Mongodb.  The implicit
   192  // _id field is explicitly set to the global key of the associated
   193  // entity in the document's creation transaction, but omitted to allow
   194  // direct use of the document in both create and update transactions.
   195  type statusDoc struct {
   196  	ModelUUID  string                 `bson:"model-uuid"`
   197  	Status     status.Status          `bson:"status"`
   198  	StatusInfo string                 `bson:"statusinfo"`
   199  	StatusData map[string]interface{} `bson:"statusdata"`
   200  
   201  	// Updated used to be a *time.Time that was not present on statuses dating
   202  	// from older versions of juju so this might be 0 for those cases.
   203  	Updated int64 `bson:"updated"`
   204  }
   205  
   206  func (doc *statusDoc) asStatusInfo() status.StatusInfo {
   207  	return status.StatusInfo{
   208  		Status:  doc.Status,
   209  		Message: doc.StatusInfo,
   210  		Data:    utils.UnescapeKeys(doc.StatusData),
   211  		Since:   unixNanoToTime(doc.Updated),
   212  	}
   213  }
   214  
   215  func unixNanoToTime(i int64) *time.Time {
   216  	t := time.Unix(0, i)
   217  	return &t
   218  }
   219  
   220  // getStatus retrieves the status document associated with the given
   221  // globalKey and converts it to a StatusInfo. If the status document
   222  // is not found, a NotFoundError referencing badge will be returned.
   223  func getStatus(db Database, globalKey, badge string) (_ status.StatusInfo, err error) {
   224  	defer errors.DeferredAnnotatef(&err, "cannot get status")
   225  	statuses, closer := db.GetCollection(statusesC)
   226  	defer closer()
   227  
   228  	var doc statusDoc
   229  	err = statuses.FindId(globalKey).One(&doc)
   230  	if err == mgo.ErrNotFound {
   231  		return status.StatusInfo{}, errors.NotFoundf(badge)
   232  	} else if err != nil {
   233  		return status.StatusInfo{}, errors.Trace(err)
   234  	}
   235  
   236  	return doc.asStatusInfo(), nil
   237  }
   238  
   239  func getEntityKeysForStatus(mb modelBackend, keyType string, status status.Status) ([]string, error) {
   240  	statuses, closer := mb.db().GetCollection(statusesC)
   241  	defer closer()
   242  
   243  	var ids []bson.M
   244  	query := bson.D{
   245  		{"_id", bson.D{{"$regex", fmt.Sprintf(".+\\:%s#.+", keyType)}}},
   246  		{"status", status},
   247  	}
   248  	err := statuses.Find(query).Select(bson.D{{"_id", 1}}).All(&ids)
   249  	if err != nil {
   250  		return nil, errors.Trace(err)
   251  	}
   252  
   253  	keys := make([]string, len(ids))
   254  	for i, id := range ids {
   255  		keys[i] = mb.localID(id["_id"].(string))
   256  	}
   257  	return keys, nil
   258  }
   259  
   260  // setStatusParams configures a setStatus call. All parameters are presumed to
   261  // be set to valid values unless otherwise noted.
   262  type setStatusParams struct {
   263  
   264  	// badge is used to specialize any NotFound error emitted.
   265  	badge string
   266  
   267  	// globalKey uniquely identifies the entity to which the
   268  	globalKey string
   269  
   270  	// status is the status value.
   271  	status status.Status
   272  
   273  	// message is an optional string elaborating upon the status.
   274  	message string
   275  
   276  	// rawData is a map of arbitrary data elaborating upon the status and
   277  	// message. Its keys are assumed not to have been escaped.
   278  	rawData map[string]interface{}
   279  
   280  	// token, if present, must accept an *[]txn.Op passed to its Check method,
   281  	// and will prevent any change if it becomes invalid.
   282  	token leadership.Token
   283  
   284  	// updated, the time the status was set.
   285  	updated *time.Time
   286  
   287  	// historyOverwrite provides an optional ability to write a different
   288  	// version of status as history (vs. what status actually gets set.)
   289  	// Used only with caas models as there is currently no way for a charm
   290  	// to query its' workload and the cloud container status might contradict
   291  	// what it thinks it is.
   292  	historyOverwrite *statusDoc
   293  }
   294  
   295  func timeOrNow(t *time.Time, clock clock.Clock) *time.Time {
   296  	if t == nil {
   297  		now := clock.Now()
   298  		t = &now
   299  	}
   300  	return t
   301  }
   302  
   303  // setStatus inteprets the supplied params as documented on the type.
   304  func setStatus(db Database, params setStatusParams) (err error) {
   305  	defer errors.DeferredAnnotatef(&err, "cannot set status")
   306  	if params.updated == nil {
   307  		return errors.NotValidf("nil updated time")
   308  	}
   309  
   310  	doc := statusDoc{
   311  		Status:     params.status,
   312  		StatusInfo: params.message,
   313  		StatusData: utils.EscapeKeys(params.rawData),
   314  		Updated:    params.updated.UnixNano(),
   315  	}
   316  
   317  	historyDoc := &doc
   318  	if params.historyOverwrite != nil {
   319  		historyDoc = params.historyOverwrite
   320  	}
   321  
   322  	newStatus, historyErr := probablyUpdateStatusHistory(db, params.globalKey, *historyDoc)
   323  	if params.historyOverwrite == nil && (!newStatus && historyErr == nil) {
   324  		// If this status is not new (i.e. it is exactly the same as
   325  		// our last status), there is no need to update the record.
   326  		// Update here will only reset the 'Since' field.
   327  		return nil
   328  	}
   329  
   330  	// Set the authoritative status document, or fail trying.
   331  	var buildTxn jujutxn.TransactionSource = func(int) ([]txn.Op, error) {
   332  		return statusSetOps(db, doc, params.globalKey)
   333  	}
   334  	if params.token != nil {
   335  		buildTxn = buildTxnWithLeadership(buildTxn, params.token)
   336  	}
   337  	err = db.Run(buildTxn)
   338  	if cause := errors.Cause(err); cause == mgo.ErrNotFound {
   339  		return errors.NotFoundf(params.badge)
   340  	}
   341  	return errors.Trace(err)
   342  }
   343  
   344  func statusSetOps(db Database, doc statusDoc, globalKey string) ([]txn.Op, error) {
   345  	update := bson.D{{"$set", &doc}}
   346  	txnRevno, err := readTxnRevno(db, statusesC, globalKey)
   347  	if err != nil {
   348  		return nil, errors.Trace(err)
   349  	}
   350  	assert := bson.D{{"txn-revno", txnRevno}}
   351  	return []txn.Op{{
   352  		C:      statusesC,
   353  		Id:     globalKey,
   354  		Assert: assert,
   355  		Update: update,
   356  	}}, nil
   357  }
   358  
   359  // createStatusOp returns the operation needed to create the given status
   360  // document associated with the given globalKey.
   361  func createStatusOp(mb modelBackend, globalKey string, doc statusDoc) txn.Op {
   362  	return txn.Op{
   363  		C:      statusesC,
   364  		Id:     mb.docID(globalKey),
   365  		Assert: txn.DocMissing,
   366  		Insert: &doc,
   367  	}
   368  }
   369  
   370  // removeStatusOp returns the operation needed to remove the status
   371  // document associated with the given globalKey.
   372  func removeStatusOp(mb modelBackend, globalKey string) txn.Op {
   373  	return txn.Op{
   374  		C:      statusesC,
   375  		Id:     mb.docID(globalKey),
   376  		Remove: true,
   377  	}
   378  }
   379  
   380  // globalKeyField must have the same value as the tag for
   381  // historicalStatusDoc.GlobalKey.
   382  const globalKeyField = "globalkey"
   383  
   384  type historicalStatusDoc struct {
   385  	ModelUUID  string                 `bson:"model-uuid"`
   386  	GlobalKey  string                 `bson:"globalkey"`
   387  	Status     status.Status          `bson:"status"`
   388  	StatusInfo string                 `bson:"statusinfo"`
   389  	StatusData map[string]interface{} `bson:"statusdata"`
   390  
   391  	// Updated might not be present on statuses copied by old
   392  	// versions of juju from yet older versions of juju.
   393  	Updated int64 `bson:"updated"`
   394  }
   395  
   396  type recordedHistoricalStatusDoc struct {
   397  	ID         bson.ObjectId          `bson:"_id"`
   398  	Status     status.Status          `bson:"status"`
   399  	StatusInfo string                 `bson:"statusinfo"`
   400  	StatusData map[string]interface{} `bson:"statusdata"`
   401  }
   402  
   403  // probablyUpdateStatusHistory inspects existing status-history
   404  // and determines if this status is new or the same as the last recorded.
   405  // If this is a new status, a new status history record will be added.
   406  // If this status is the same as the last status we've received,
   407  // we update that record to have a new timestamp.
   408  // Status messages are considered to be the same if they only differ in their timestamps.
   409  // The call returns true if a new status history record has been created.
   410  func probablyUpdateStatusHistory(db Database, globalKey string, doc statusDoc) (bool, error) {
   411  	historyDoc := &historicalStatusDoc{
   412  		Status:     doc.Status,
   413  		StatusInfo: doc.StatusInfo,
   414  		StatusData: doc.StatusData, // coming from a statusDoc, already escaped
   415  		Updated:    doc.Updated,
   416  		GlobalKey:  globalKey,
   417  	}
   418  	history, closer := db.GetCollection(statusesHistoryC)
   419  	defer closer()
   420  
   421  	exists, currentID := statusHistoryExists(db, historyDoc)
   422  	if exists {
   423  		// If the status values have not changed since the last run,
   424  		// update history record with this timestamp
   425  		// to keep correct track of when SetStatus ran.
   426  		historyW := history.Writeable()
   427  		err := historyW.Update(
   428  			bson.D{{"_id", currentID}},
   429  			bson.D{{"$set", bson.D{{"updated", doc.Updated}}}})
   430  		if err != nil {
   431  			logger.Errorf("failed to update status history: %v", err)
   432  			return false, err
   433  		}
   434  		return false, nil
   435  	}
   436  
   437  	historyW := history.Writeable()
   438  	err := historyW.Insert(historyDoc)
   439  	if err != nil {
   440  		logger.Errorf("failed to write status history: %v", err)
   441  		return false, err
   442  	}
   443  	return true, nil
   444  }
   445  
   446  func statusHistoryExists(db Database, historyDoc *historicalStatusDoc) (bool, bson.ObjectId) {
   447  	// Find the current value to see if it is worthwhile adding the new
   448  	// status value.
   449  	history, closer := db.GetCollection(statusesHistoryC)
   450  	defer closer()
   451  
   452  	var latest []recordedHistoricalStatusDoc
   453  	query := history.Find(bson.D{{globalKeyField, historyDoc.GlobalKey}})
   454  	query = query.Sort("-updated").Limit(1)
   455  	err := query.All(&latest)
   456  	if err == nil && len(latest) == 1 {
   457  		current := latest[0]
   458  		// Short circuit the writing to the DB if the status, message,
   459  		// and data match.
   460  		dataSame := func(left, right map[string]interface{}) bool {
   461  			// If they are both empty, then it is the same.
   462  			if len(left) == 0 && len(right) == 0 {
   463  				return true
   464  			}
   465  			// If either are now empty, they aren't the same.
   466  			if len(left) == 0 || len(right) == 0 {
   467  				return false
   468  			}
   469  			// Failing that, use reflect.
   470  			return reflect.DeepEqual(left, right)
   471  		}
   472  		// Check the data last as the short circuit evaluation may mean
   473  		// we rarely need to drop down into the reflect library.
   474  		if current.Status == historyDoc.Status &&
   475  			current.StatusInfo == historyDoc.StatusInfo &&
   476  			dataSame(current.StatusData, historyDoc.StatusData) {
   477  			return true, current.ID
   478  		}
   479  	}
   480  	return false, ""
   481  }
   482  
   483  // eraseStatusHistory removes all status history documents for
   484  // the given global key. The documents are removed in batches
   485  // to avoid locking the status history collection for extended
   486  // periods of time, preventing status history being recorded
   487  // for other entities.
   488  func eraseStatusHistory(stop <-chan struct{}, mb modelBackend, globalKey string) error {
   489  	// TODO(axw) restructure status history so we have one
   490  	// document per global key, and sub-documents per status
   491  	// recording. This method would then become a single
   492  	// Remove operation.
   493  
   494  	history, closer := mb.db().GetCollection(statusesHistoryC)
   495  	defer closer()
   496  
   497  	iter := history.Find(bson.D{{
   498  		globalKeyField, globalKey,
   499  	}}).Select(bson.M{"_id": 1}).Iter()
   500  	defer iter.Close()
   501  
   502  	logFormat := "deleted %d status history documents for " + fmt.Sprintf("%q", globalKey)
   503  	deleted, err := deleteInBatches(
   504  		stop,
   505  		history.Writeable().Underlying(), nil, "", iter,
   506  		logFormat, loggo.DEBUG,
   507  		noEarlyFinish,
   508  	)
   509  	if err != nil {
   510  		return errors.Trace(err)
   511  	}
   512  	if deleted > 0 {
   513  		logger.Debugf(logFormat, deleted)
   514  	}
   515  	return nil
   516  }
   517  
   518  // statusHistoryArgs hold the arguments to call statusHistory.
   519  type statusHistoryArgs struct {
   520  	db        Database
   521  	globalKey string
   522  	filter    status.StatusHistoryFilter
   523  	clock     clock.Clock
   524  }
   525  
   526  // fetchNStatusResults will return status for the given key filtered with the
   527  // given filter or error.
   528  func fetchNStatusResults(col mongo.Collection, clock clock.Clock,
   529  	key string, filter status.StatusHistoryFilter) ([]historicalStatusDoc, error) {
   530  	var (
   531  		docs  []historicalStatusDoc
   532  		query mongo.Query
   533  	)
   534  	baseQuery := bson.M{"globalkey": key}
   535  	if filter.Delta != nil {
   536  		delta := *filter.Delta
   537  		updated := clock.Now().Add(-delta)
   538  		baseQuery["updated"] = bson.M{"$gt": updated.UnixNano()}
   539  	}
   540  	if filter.FromDate != nil {
   541  		baseQuery["updated"] = bson.M{"$gt": filter.FromDate.UnixNano()}
   542  	}
   543  	excludes := []string{}
   544  	excludes = append(excludes, filter.Exclude.Values()...)
   545  	if len(excludes) > 0 {
   546  		baseQuery["statusinfo"] = bson.M{"$nin": excludes}
   547  	}
   548  
   549  	query = col.Find(baseQuery).Sort("-updated")
   550  	if filter.Size > 0 {
   551  		query = query.Limit(filter.Size)
   552  	}
   553  	err := query.All(&docs)
   554  
   555  	if err == mgo.ErrNotFound {
   556  		return []historicalStatusDoc{}, errors.NotFoundf("status history")
   557  	} else if err != nil {
   558  		return []historicalStatusDoc{}, errors.Annotatef(err, "cannot get status history")
   559  	}
   560  	return docs, nil
   561  
   562  }
   563  
   564  func statusHistory(args *statusHistoryArgs) ([]status.StatusInfo, error) {
   565  	if err := args.filter.Validate(); err != nil {
   566  		return nil, errors.Annotate(err, "validating arguments")
   567  	}
   568  	statusHistory, closer := args.db.GetCollection(statusesHistoryC)
   569  	defer closer()
   570  
   571  	var results []status.StatusInfo
   572  	docs, err := fetchNStatusResults(statusHistory, args.clock, args.globalKey, args.filter)
   573  	partial := []status.StatusInfo{}
   574  	if err != nil {
   575  		return []status.StatusInfo{}, errors.Trace(err)
   576  	}
   577  	for _, doc := range docs {
   578  		partial = append(partial, status.StatusInfo{
   579  			Status:  doc.Status,
   580  			Message: doc.StatusInfo,
   581  			Data:    utils.UnescapeKeys(doc.StatusData),
   582  			Since:   unixNanoToTime(doc.Updated),
   583  		})
   584  	}
   585  	results = partial
   586  	return results, nil
   587  }
   588  
   589  // PruneStatusHistory prunes the status history collection.
   590  func PruneStatusHistory(stop <-chan struct{}, st *State, maxHistoryTime time.Duration, maxHistoryMB int) error {
   591  	coll, closer := st.db().GetRawCollection(statusesHistoryC)
   592  	defer closer()
   593  
   594  	err := pruneCollection(stop, st, maxHistoryTime, maxHistoryMB, coll, "updated", nil, NanoSeconds)
   595  	return errors.Trace(err)
   596  }