github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/status.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "fmt" 8 "reflect" 9 "time" 10 11 "github.com/juju/clock" 12 "github.com/juju/errors" 13 "github.com/juju/loggo" 14 "github.com/juju/mgo/v3" 15 "github.com/juju/mgo/v3/bson" 16 "github.com/juju/mgo/v3/txn" 17 jujutxn "github.com/juju/txn/v3" 18 19 "github.com/juju/juju/core/leadership" 20 "github.com/juju/juju/core/status" 21 "github.com/juju/juju/mongo" 22 "github.com/juju/juju/mongo/utils" 23 ) 24 25 type displayStatusFunc func(unitStatus status.StatusInfo, containerStatus status.StatusInfo, expectWorkload bool) status.StatusInfo 26 27 // ModelStatus holds all the current status values for a given model 28 // and offers accessors for the various parts of a model. 29 type ModelStatus struct { 30 model *Model 31 docs map[string]statusDocWithID 32 } 33 34 // LoadModelStatus retrieves all the status documents for the model 35 // at once. Used to primarily speed up status. 36 func (m *Model) LoadModelStatus() (*ModelStatus, error) { 37 statuses, closer := m.st.db().GetCollection(statusesC) 38 defer closer() 39 40 var docs []statusDocWithID 41 err := statuses.Find(nil).All(&docs) 42 if err != nil { 43 return nil, errors.Annotate(err, "failed to read status collection") 44 } 45 46 result := &ModelStatus{ 47 model: m, 48 docs: make(map[string]statusDocWithID), 49 } 50 for _, doc := range docs { 51 id := m.localID(doc.ID) 52 result.docs[id] = doc 53 } 54 55 return result, nil 56 } 57 58 func (m *ModelStatus) getDoc(key, badge string) (statusDocWithID, error) { 59 doc, found := m.docs[key] 60 if !found { 61 return statusDocWithID{}, errors.Annotate(errors.NotFoundf(badge), "cannot get status") 62 } 63 return doc, nil 64 } 65 66 func (m *ModelStatus) getStatus(key, badge string) (status.StatusInfo, error) { 67 doc, err := m.getDoc(key, badge) 68 if err != nil { 69 return status.StatusInfo{}, err 70 } 71 return doc.asStatusInfo(), nil 72 } 73 74 // Model returns the status of the model. 75 func (m *ModelStatus) Model() (status.StatusInfo, error) { 76 return m.getStatus(m.model.globalKey(), "model") 77 } 78 79 // MachineAgent returns the status of the machine agent. 80 func (m *ModelStatus) MachineAgent(machineID string) (status.StatusInfo, error) { 81 return m.getStatus(machineGlobalKey(machineID), "machine") 82 } 83 84 // MachineInstance returns the status of the machine instance. 85 func (m *ModelStatus) MachineInstance(machineID string) (status.StatusInfo, error) { 86 return m.getStatus(machineGlobalInstanceKey(machineID), "instance") 87 } 88 89 // MachineModification returns the status of the machine modification 90 func (m *ModelStatus) MachineModification(machineID string) (status.StatusInfo, error) { 91 return m.getStatus(machineGlobalModificationKey(machineID), "modification") 92 } 93 94 // FullUnitWorkloadVersion returns the full status info for the workload 95 // version of a unit. This is used for selecting the workload version for 96 // an application. 97 func (m *ModelStatus) FullUnitWorkloadVersion(unitName string) (status.StatusInfo, error) { 98 return m.getStatus(globalWorkloadVersionKey(unitName), "workload") 99 } 100 101 // UnitWorkloadVersion returns workload version for the unit 102 func (m *ModelStatus) UnitWorkloadVersion(unitName string) (string, error) { 103 info, err := m.getStatus(globalWorkloadVersionKey(unitName), "workload") 104 if err != nil { 105 return "", err 106 } 107 return info.Message, nil 108 } 109 110 // UnitAgent returns the status of the Unit's agent. 111 func (m *ModelStatus) UnitAgent(unitName string) (status.StatusInfo, error) { 112 // We do horrible things with unit status. 113 // See notes in unitagent.go. 114 info, err := m.getStatus(unitAgentGlobalKey(unitName), "agent") 115 if err != nil { 116 return info, err 117 } 118 if info.Status == status.Error { 119 return status.StatusInfo{ 120 Status: status.Idle, 121 Message: "", 122 Data: map[string]interface{}{}, 123 Since: info.Since, 124 }, nil 125 } 126 return info, nil 127 } 128 129 // UnitWorkload returns the status of the unit's workload. 130 func (m *ModelStatus) UnitWorkload(unitName string, expectWorkload bool) (status.StatusInfo, error) { 131 // We do horrible things with unit status. 132 // See notes in unit.go. 133 info, err := m.getStatus(unitAgentGlobalKey(unitName), "unit") 134 if err != nil { 135 return info, err 136 } else if info.Status == status.Error { 137 return info, nil 138 } 139 140 // (for CAAS models) Use cloud container status over unit if the cloud 141 // container status is error or active or the unit status hasn't shifted 142 // from 'allocating' 143 info, err = m.getStatus(unitGlobalKey(unitName), "workload") 144 if err != nil { 145 return info, errors.Trace(err) 146 } 147 148 if m.model.Type() == ModelTypeIAAS { 149 return info, nil 150 } 151 152 containerInfo, err := m.getStatus(globalCloudContainerKey(unitName), "cloud container") 153 if err != nil && !errors.IsNotFound(err) { 154 return info, err 155 } 156 return status.UnitDisplayStatus(info, containerInfo, expectWorkload), nil 157 } 158 159 // caasHistoryRewriteDoc determines which status should be stored as history. 160 func caasHistoryRewriteDoc(jujuStatus, caasStatus status.StatusInfo, expectWorkload bool, displayStatus displayStatusFunc, clock clock.Clock) (*statusDoc, error) { 161 modifiedStatus := displayStatus(jujuStatus, caasStatus, expectWorkload) 162 if modifiedStatus.Status == jujuStatus.Status && modifiedStatus.Message == jujuStatus.Message { 163 return nil, nil 164 } 165 return &statusDoc{ 166 Status: modifiedStatus.Status, 167 StatusInfo: modifiedStatus.Message, 168 StatusData: utils.EscapeKeys(modifiedStatus.Data), 169 Updated: timeOrNow(modifiedStatus.Since, clock).UnixNano(), 170 }, nil 171 } 172 173 type statusDocWithID struct { 174 ID string `bson:"_id"` 175 ModelUUID string `bson:"model-uuid"` 176 Status status.Status `bson:"status"` 177 StatusInfo string `bson:"statusinfo"` 178 StatusData map[string]interface{} `bson:"statusdata"` 179 Updated int64 `bson:"updated"` 180 } 181 182 func (doc *statusDocWithID) asStatusInfo() status.StatusInfo { 183 return status.StatusInfo{ 184 Status: doc.Status, 185 Message: doc.StatusInfo, 186 Data: utils.UnescapeKeys(doc.StatusData), 187 Since: unixNanoToTime(doc.Updated), 188 } 189 } 190 191 // statusDoc represents a entity status in Mongodb. The implicit 192 // _id field is explicitly set to the global key of the associated 193 // entity in the document's creation transaction, but omitted to allow 194 // direct use of the document in both create and update transactions. 195 type statusDoc struct { 196 ModelUUID string `bson:"model-uuid"` 197 Status status.Status `bson:"status"` 198 StatusInfo string `bson:"statusinfo"` 199 StatusData map[string]interface{} `bson:"statusdata"` 200 201 // Updated used to be a *time.Time that was not present on statuses dating 202 // from older versions of juju so this might be 0 for those cases. 203 Updated int64 `bson:"updated"` 204 } 205 206 func (doc *statusDoc) asStatusInfo() status.StatusInfo { 207 return status.StatusInfo{ 208 Status: doc.Status, 209 Message: doc.StatusInfo, 210 Data: utils.UnescapeKeys(doc.StatusData), 211 Since: unixNanoToTime(doc.Updated), 212 } 213 } 214 215 func unixNanoToTime(i int64) *time.Time { 216 t := time.Unix(0, i) 217 return &t 218 } 219 220 // getStatus retrieves the status document associated with the given 221 // globalKey and converts it to a StatusInfo. If the status document 222 // is not found, a NotFoundError referencing badge will be returned. 223 func getStatus(db Database, globalKey, badge string) (_ status.StatusInfo, err error) { 224 defer errors.DeferredAnnotatef(&err, "cannot get status") 225 statuses, closer := db.GetCollection(statusesC) 226 defer closer() 227 228 var doc statusDoc 229 err = statuses.FindId(globalKey).One(&doc) 230 if err == mgo.ErrNotFound { 231 return status.StatusInfo{}, errors.NotFoundf(badge) 232 } else if err != nil { 233 return status.StatusInfo{}, errors.Trace(err) 234 } 235 236 return doc.asStatusInfo(), nil 237 } 238 239 func getEntityKeysForStatus(mb modelBackend, keyType string, status status.Status) ([]string, error) { 240 statuses, closer := mb.db().GetCollection(statusesC) 241 defer closer() 242 243 var ids []bson.M 244 query := bson.D{ 245 {"_id", bson.D{{"$regex", fmt.Sprintf(".+\\:%s#.+", keyType)}}}, 246 {"status", status}, 247 } 248 err := statuses.Find(query).Select(bson.D{{"_id", 1}}).All(&ids) 249 if err != nil { 250 return nil, errors.Trace(err) 251 } 252 253 keys := make([]string, len(ids)) 254 for i, id := range ids { 255 keys[i] = mb.localID(id["_id"].(string)) 256 } 257 return keys, nil 258 } 259 260 // setStatusParams configures a setStatus call. All parameters are presumed to 261 // be set to valid values unless otherwise noted. 262 type setStatusParams struct { 263 264 // badge is used to specialize any NotFound error emitted. 265 badge string 266 267 // globalKey uniquely identifies the entity to which the 268 globalKey string 269 270 // status is the status value. 271 status status.Status 272 273 // message is an optional string elaborating upon the status. 274 message string 275 276 // rawData is a map of arbitrary data elaborating upon the status and 277 // message. Its keys are assumed not to have been escaped. 278 rawData map[string]interface{} 279 280 // token, if present, must accept an *[]txn.Op passed to its Check method, 281 // and will prevent any change if it becomes invalid. 282 token leadership.Token 283 284 // updated, the time the status was set. 285 updated *time.Time 286 287 // historyOverwrite provides an optional ability to write a different 288 // version of status as history (vs. what status actually gets set.) 289 // Used only with caas models as there is currently no way for a charm 290 // to query its' workload and the cloud container status might contradict 291 // what it thinks it is. 292 historyOverwrite *statusDoc 293 } 294 295 func timeOrNow(t *time.Time, clock clock.Clock) *time.Time { 296 if t == nil { 297 now := clock.Now() 298 t = &now 299 } 300 return t 301 } 302 303 // setStatus inteprets the supplied params as documented on the type. 304 func setStatus(db Database, params setStatusParams) (err error) { 305 defer errors.DeferredAnnotatef(&err, "cannot set status") 306 if params.updated == nil { 307 return errors.NotValidf("nil updated time") 308 } 309 310 doc := statusDoc{ 311 Status: params.status, 312 StatusInfo: params.message, 313 StatusData: utils.EscapeKeys(params.rawData), 314 Updated: params.updated.UnixNano(), 315 } 316 317 historyDoc := &doc 318 if params.historyOverwrite != nil { 319 historyDoc = params.historyOverwrite 320 } 321 322 newStatus, historyErr := probablyUpdateStatusHistory(db, params.globalKey, *historyDoc) 323 if params.historyOverwrite == nil && (!newStatus && historyErr == nil) { 324 // If this status is not new (i.e. it is exactly the same as 325 // our last status), there is no need to update the record. 326 // Update here will only reset the 'Since' field. 327 return nil 328 } 329 330 // Set the authoritative status document, or fail trying. 331 var buildTxn jujutxn.TransactionSource = func(int) ([]txn.Op, error) { 332 return statusSetOps(db, doc, params.globalKey) 333 } 334 if params.token != nil { 335 buildTxn = buildTxnWithLeadership(buildTxn, params.token) 336 } 337 err = db.Run(buildTxn) 338 if cause := errors.Cause(err); cause == mgo.ErrNotFound { 339 return errors.NotFoundf(params.badge) 340 } 341 return errors.Trace(err) 342 } 343 344 func statusSetOps(db Database, doc statusDoc, globalKey string) ([]txn.Op, error) { 345 update := bson.D{{"$set", &doc}} 346 txnRevno, err := readTxnRevno(db, statusesC, globalKey) 347 if err != nil { 348 return nil, errors.Trace(err) 349 } 350 assert := bson.D{{"txn-revno", txnRevno}} 351 return []txn.Op{{ 352 C: statusesC, 353 Id: globalKey, 354 Assert: assert, 355 Update: update, 356 }}, nil 357 } 358 359 // createStatusOp returns the operation needed to create the given status 360 // document associated with the given globalKey. 361 func createStatusOp(mb modelBackend, globalKey string, doc statusDoc) txn.Op { 362 return txn.Op{ 363 C: statusesC, 364 Id: mb.docID(globalKey), 365 Assert: txn.DocMissing, 366 Insert: &doc, 367 } 368 } 369 370 // removeStatusOp returns the operation needed to remove the status 371 // document associated with the given globalKey. 372 func removeStatusOp(mb modelBackend, globalKey string) txn.Op { 373 return txn.Op{ 374 C: statusesC, 375 Id: mb.docID(globalKey), 376 Remove: true, 377 } 378 } 379 380 // globalKeyField must have the same value as the tag for 381 // historicalStatusDoc.GlobalKey. 382 const globalKeyField = "globalkey" 383 384 type historicalStatusDoc struct { 385 ModelUUID string `bson:"model-uuid"` 386 GlobalKey string `bson:"globalkey"` 387 Status status.Status `bson:"status"` 388 StatusInfo string `bson:"statusinfo"` 389 StatusData map[string]interface{} `bson:"statusdata"` 390 391 // Updated might not be present on statuses copied by old 392 // versions of juju from yet older versions of juju. 393 Updated int64 `bson:"updated"` 394 } 395 396 type recordedHistoricalStatusDoc struct { 397 ID bson.ObjectId `bson:"_id"` 398 Status status.Status `bson:"status"` 399 StatusInfo string `bson:"statusinfo"` 400 StatusData map[string]interface{} `bson:"statusdata"` 401 } 402 403 // probablyUpdateStatusHistory inspects existing status-history 404 // and determines if this status is new or the same as the last recorded. 405 // If this is a new status, a new status history record will be added. 406 // If this status is the same as the last status we've received, 407 // we update that record to have a new timestamp. 408 // Status messages are considered to be the same if they only differ in their timestamps. 409 // The call returns true if a new status history record has been created. 410 func probablyUpdateStatusHistory(db Database, globalKey string, doc statusDoc) (bool, error) { 411 historyDoc := &historicalStatusDoc{ 412 Status: doc.Status, 413 StatusInfo: doc.StatusInfo, 414 StatusData: doc.StatusData, // coming from a statusDoc, already escaped 415 Updated: doc.Updated, 416 GlobalKey: globalKey, 417 } 418 history, closer := db.GetCollection(statusesHistoryC) 419 defer closer() 420 421 exists, currentID := statusHistoryExists(db, historyDoc) 422 if exists { 423 // If the status values have not changed since the last run, 424 // update history record with this timestamp 425 // to keep correct track of when SetStatus ran. 426 historyW := history.Writeable() 427 err := historyW.Update( 428 bson.D{{"_id", currentID}}, 429 bson.D{{"$set", bson.D{{"updated", doc.Updated}}}}) 430 if err != nil { 431 logger.Errorf("failed to update status history: %v", err) 432 return false, err 433 } 434 return false, nil 435 } 436 437 historyW := history.Writeable() 438 err := historyW.Insert(historyDoc) 439 if err != nil { 440 logger.Errorf("failed to write status history: %v", err) 441 return false, err 442 } 443 return true, nil 444 } 445 446 func statusHistoryExists(db Database, historyDoc *historicalStatusDoc) (bool, bson.ObjectId) { 447 // Find the current value to see if it is worthwhile adding the new 448 // status value. 449 history, closer := db.GetCollection(statusesHistoryC) 450 defer closer() 451 452 var latest []recordedHistoricalStatusDoc 453 query := history.Find(bson.D{{globalKeyField, historyDoc.GlobalKey}}) 454 query = query.Sort("-updated").Limit(1) 455 err := query.All(&latest) 456 if err == nil && len(latest) == 1 { 457 current := latest[0] 458 // Short circuit the writing to the DB if the status, message, 459 // and data match. 460 dataSame := func(left, right map[string]interface{}) bool { 461 // If they are both empty, then it is the same. 462 if len(left) == 0 && len(right) == 0 { 463 return true 464 } 465 // If either are now empty, they aren't the same. 466 if len(left) == 0 || len(right) == 0 { 467 return false 468 } 469 // Failing that, use reflect. 470 return reflect.DeepEqual(left, right) 471 } 472 // Check the data last as the short circuit evaluation may mean 473 // we rarely need to drop down into the reflect library. 474 if current.Status == historyDoc.Status && 475 current.StatusInfo == historyDoc.StatusInfo && 476 dataSame(current.StatusData, historyDoc.StatusData) { 477 return true, current.ID 478 } 479 } 480 return false, "" 481 } 482 483 // eraseStatusHistory removes all status history documents for 484 // the given global key. The documents are removed in batches 485 // to avoid locking the status history collection for extended 486 // periods of time, preventing status history being recorded 487 // for other entities. 488 func eraseStatusHistory(stop <-chan struct{}, mb modelBackend, globalKey string) error { 489 // TODO(axw) restructure status history so we have one 490 // document per global key, and sub-documents per status 491 // recording. This method would then become a single 492 // Remove operation. 493 494 history, closer := mb.db().GetCollection(statusesHistoryC) 495 defer closer() 496 497 iter := history.Find(bson.D{{ 498 globalKeyField, globalKey, 499 }}).Select(bson.M{"_id": 1}).Iter() 500 defer iter.Close() 501 502 logFormat := "deleted %d status history documents for " + fmt.Sprintf("%q", globalKey) 503 deleted, err := deleteInBatches( 504 stop, 505 history.Writeable().Underlying(), nil, "", iter, 506 logFormat, loggo.DEBUG, 507 noEarlyFinish, 508 ) 509 if err != nil { 510 return errors.Trace(err) 511 } 512 if deleted > 0 { 513 logger.Debugf(logFormat, deleted) 514 } 515 return nil 516 } 517 518 // statusHistoryArgs hold the arguments to call statusHistory. 519 type statusHistoryArgs struct { 520 db Database 521 globalKey string 522 filter status.StatusHistoryFilter 523 clock clock.Clock 524 } 525 526 // fetchNStatusResults will return status for the given key filtered with the 527 // given filter or error. 528 func fetchNStatusResults(col mongo.Collection, clock clock.Clock, 529 key string, filter status.StatusHistoryFilter) ([]historicalStatusDoc, error) { 530 var ( 531 docs []historicalStatusDoc 532 query mongo.Query 533 ) 534 baseQuery := bson.M{"globalkey": key} 535 if filter.Delta != nil { 536 delta := *filter.Delta 537 updated := clock.Now().Add(-delta) 538 baseQuery["updated"] = bson.M{"$gt": updated.UnixNano()} 539 } 540 if filter.FromDate != nil { 541 baseQuery["updated"] = bson.M{"$gt": filter.FromDate.UnixNano()} 542 } 543 excludes := []string{} 544 excludes = append(excludes, filter.Exclude.Values()...) 545 if len(excludes) > 0 { 546 baseQuery["statusinfo"] = bson.M{"$nin": excludes} 547 } 548 549 query = col.Find(baseQuery).Sort("-updated") 550 if filter.Size > 0 { 551 query = query.Limit(filter.Size) 552 } 553 err := query.All(&docs) 554 555 if err == mgo.ErrNotFound { 556 return []historicalStatusDoc{}, errors.NotFoundf("status history") 557 } else if err != nil { 558 return []historicalStatusDoc{}, errors.Annotatef(err, "cannot get status history") 559 } 560 return docs, nil 561 562 } 563 564 func statusHistory(args *statusHistoryArgs) ([]status.StatusInfo, error) { 565 if err := args.filter.Validate(); err != nil { 566 return nil, errors.Annotate(err, "validating arguments") 567 } 568 statusHistory, closer := args.db.GetCollection(statusesHistoryC) 569 defer closer() 570 571 var results []status.StatusInfo 572 docs, err := fetchNStatusResults(statusHistory, args.clock, args.globalKey, args.filter) 573 partial := []status.StatusInfo{} 574 if err != nil { 575 return []status.StatusInfo{}, errors.Trace(err) 576 } 577 for _, doc := range docs { 578 partial = append(partial, status.StatusInfo{ 579 Status: doc.Status, 580 Message: doc.StatusInfo, 581 Data: utils.UnescapeKeys(doc.StatusData), 582 Since: unixNanoToTime(doc.Updated), 583 }) 584 } 585 results = partial 586 return results, nil 587 } 588 589 // PruneStatusHistory prunes the status history collection. 590 func PruneStatusHistory(stop <-chan struct{}, st *State, maxHistoryTime time.Duration, maxHistoryMB int) error { 591 coll, closer := st.db().GetRawCollection(statusesHistoryC) 592 defer closer() 593 594 err := pruneCollection(stop, st, maxHistoryTime, maxHistoryMB, coll, "updated", nil, NanoSeconds) 595 return errors.Trace(err) 596 }