github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/upgrade.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 /* 5 This file defines infrastructure for synchronising controller tools 6 upgrades. Synchronisation is handled via a mongo DB document in the 7 "upgradeInfo" collection. 8 9 The functionality here is intended to be used as follows: 10 11 1. When controllers come up running the new tools version, they call 12 EnsureUpgradeInfo before running upgrade steps. 13 14 2a. Any secondary controller watches the UpgradeInfo document and 15 waits for the status to change to UpgradeFinishing. 16 17 2b. The master controller watches the UpgradeInfo document and waits 18 for AllProvisionedControllersReady to return true. This indicates 19 that all provisioned controllers have called EnsureUpgradeInfo and 20 are ready to upgrade. 21 22 3. The master controller calls SetStatus with UpgradeRunning and 23 runs its upgrade steps. 24 25 4. The master controller calls SetStatus with UpgradeFinishing and 26 then calls SetControllerDone with it's own machine id. 27 28 5. Secondary controllers, seeing that the status has changed to 29 UpgradeFinishing, run their upgrade steps and then call 30 SetControllerDone when complete. 31 32 6. Once the final controller calls SetControllerDone, the status is 33 changed to UpgradeComplete and the upgradeInfo document is archived. 34 */ 35 36 package state 37 38 import ( 39 "fmt" 40 "time" 41 42 "github.com/juju/collections/set" 43 "github.com/juju/errors" 44 "github.com/juju/mgo/v3" 45 "github.com/juju/mgo/v3/bson" 46 "github.com/juju/mgo/v3/txn" 47 jujutxn "github.com/juju/txn/v3" 48 "github.com/juju/version/v2" 49 50 "github.com/juju/juju/core/status" 51 ) 52 53 // UpgradeStatus describes the states an upgrade operation may be in. 54 type UpgradeStatus string 55 56 const ( 57 // UpgradePending indicates that an upgrade is queued but not yet started. 58 UpgradePending UpgradeStatus = "pending" 59 60 // UpgradeDBComplete indicates that the controller running the primary 61 // MongoDB has completed running the database upgrade steps. 62 UpgradeDBComplete UpgradeStatus = "db-complete" 63 64 // UpgradeRunning indicates that at least one controller has begun running 65 // upgrade steps. 66 UpgradeRunning UpgradeStatus = "running" 67 68 // UpgradeComplete indicates that all controllers have finished running 69 // upgrade logic. 70 UpgradeComplete UpgradeStatus = "complete" 71 72 // UpgradeAborted indicates that the upgrade wasn't completed due 73 // to some problem. 74 UpgradeAborted UpgradeStatus = "aborted" 75 76 // currentUpgradeId is the mongo _id of the current upgrade info document. 77 currentUpgradeId = "current" 78 ) 79 80 type upgradeInfoDoc struct { 81 Id string `bson:"_id"` 82 PreviousVersion version.Number `bson:"previousVersion"` 83 TargetVersion version.Number `bson:"targetVersion"` 84 Status UpgradeStatus `bson:"status"` 85 Started time.Time `bson:"started"` 86 ControllersReady []string `bson:"controllersReady"` 87 ControllersDone []string `bson:"controllersDone"` 88 } 89 90 // UpgradeInfo is used to synchronise controller upgrades. 91 type UpgradeInfo struct { 92 st *State 93 doc upgradeInfoDoc 94 } 95 96 // PreviousVersion returns the version being upgraded from. 97 func (info *UpgradeInfo) PreviousVersion() version.Number { 98 return info.doc.PreviousVersion 99 } 100 101 // TargetVersion returns the version being upgraded to. 102 func (info *UpgradeInfo) TargetVersion() version.Number { 103 return info.doc.TargetVersion 104 } 105 106 // Status returns the status of the upgrade. 107 func (info *UpgradeInfo) Status() UpgradeStatus { 108 return info.doc.Status 109 } 110 111 // Started returns the time at which the upgrade was started. 112 func (info *UpgradeInfo) Started() time.Time { 113 return info.doc.Started 114 } 115 116 // ControllersReady returns the machine ids for controllers that 117 // have signalled that they are ready for upgrade. 118 func (info *UpgradeInfo) ControllersReady() []string { 119 result := make([]string, len(info.doc.ControllersReady)) 120 copy(result, info.doc.ControllersReady) 121 return result 122 } 123 124 // ControllersDone returns the machine ids for controllers that 125 // have completed their upgrades. 126 func (info *UpgradeInfo) ControllersDone() []string { 127 result := make([]string, len(info.doc.ControllersDone)) 128 copy(result, info.doc.ControllersDone) 129 return result 130 } 131 132 // Refresh updates the contents of the UpgradeInfo from underlying state. 133 func (info *UpgradeInfo) Refresh() error { 134 doc, err := currentUpgradeInfoDoc(info.st) 135 if err != nil { 136 return errors.Trace(err) 137 } 138 info.doc = *doc 139 return nil 140 } 141 142 // Watch returns a watcher for the state underlying the current 143 // UpgradeInfo instance. This is provided purely for convenience. 144 func (info *UpgradeInfo) Watch() NotifyWatcher { 145 return info.st.WatchUpgradeInfo() 146 } 147 148 // AllProvisionedControllersReady returns true if and only if all controllers 149 // that have been started by the provisioner have called EnsureUpgradeInfo with 150 // matching versions. 151 func (info *UpgradeInfo) AllProvisionedControllersReady() (bool, error) { 152 provisioned, err := info.getProvisionedControllers() 153 if err != nil { 154 return false, errors.Trace(err) 155 } 156 ready := set.NewStrings(info.doc.ControllersReady...) 157 missing := set.NewStrings(provisioned...).Difference(ready) 158 return missing.IsEmpty(), nil 159 } 160 161 func (info *UpgradeInfo) getProvisionedControllers() ([]string, error) { 162 var provisioned []string 163 164 controllerIds, err := info.st.ControllerIds() 165 if err != nil { 166 return provisioned, errors.Annotate(err, "cannot read controllers") 167 } 168 169 // Extract current and provisioned controllers. 170 instanceData, closer := info.st.db().GetRawCollection(instanceDataC) 171 defer closer() 172 173 query := bson.D{ 174 {"model-uuid", info.st.ModelUUID()}, 175 {"machineid", bson.D{{"$in", controllerIds}}}, 176 } 177 iter := instanceData.Find(query).Select(bson.D{{"machineid", true}}).Iter() 178 179 var doc bson.M 180 for iter.Next(&doc) { 181 provisioned = append(provisioned, doc["machineid"].(string)) 182 } 183 if err := iter.Close(); err != nil { 184 return provisioned, errors.Annotate(err, "cannot read provisioned machines") 185 } 186 return provisioned, nil 187 } 188 189 // upgradeStatusHistoryAndOps sets the model's status history and returns ops for 190 // setting model status according to the UpgradeStatus. 191 func upgradeStatusHistoryAndOps(mb modelBackend, upgradeStatus UpgradeStatus, now time.Time) ([]txn.Op, error) { 192 var modelStatus status.Status 193 var msg string 194 switch upgradeStatus { 195 case UpgradeComplete: 196 modelStatus = status.Available 197 msg = fmt.Sprintf("upgraded on %q", now.UTC().Format(time.RFC3339)) 198 case UpgradeRunning: 199 modelStatus = status.Busy 200 msg = fmt.Sprintf("upgrade in progress since %q", now.UTC().Format(time.RFC3339)) 201 case UpgradeAborted: 202 modelStatus = status.Available 203 msg = fmt.Sprintf("last upgrade aborted on %q", now.UTC().Format(time.RFC3339)) 204 default: 205 return []txn.Op{}, nil 206 } 207 doc := statusDoc{ 208 Status: modelStatus, 209 StatusInfo: msg, 210 Updated: now.UnixNano(), 211 } 212 ops, err := statusSetOps(mb.db(), doc, modelGlobalKey) 213 if err != nil { 214 return nil, errors.Trace(err) 215 } 216 _, _ = probablyUpdateStatusHistory(mb.db(), modelGlobalKey, doc) 217 return ops, nil 218 } 219 220 // SetStatus sets the status of the current upgrade. Checks are made 221 // to ensure that status changes are performed in the correct order. 222 func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error { 223 var acceptableFromStatus []UpgradeStatus 224 switch status { 225 case UpgradePending, UpgradeComplete, UpgradeAborted: 226 return errors.Errorf("cannot explicitly set upgrade status to %q", status) 227 case UpgradeDBComplete: 228 acceptableFromStatus = []UpgradeStatus{UpgradePending, UpgradeDBComplete} 229 case UpgradeRunning: 230 acceptableFromStatus = []UpgradeStatus{UpgradeDBComplete, UpgradeRunning} 231 default: 232 return errors.Errorf("unknown upgrade status: %s", status) 233 } 234 if info.doc.Id != currentUpgradeId { 235 return errors.New("cannot set status on non-current upgrade") 236 } 237 238 assertSane := bson.D{{"status", bson.D{{"$in", acceptableFromStatus}}}} 239 240 buildTxn := func(attempt int) ([]txn.Op, error) { 241 doc, err := currentUpgradeInfoDoc(info.st) 242 if err != nil { 243 return nil, errors.Trace(err) 244 } 245 if doc.Status == status { 246 return nil, jujutxn.ErrNoOperations 247 } 248 249 var validFromStatus bool 250 for _, s := range acceptableFromStatus { 251 if s == doc.Status { 252 validFromStatus = true 253 break 254 } 255 } 256 if !validFromStatus { 257 return nil, errors.NotValidf("upgrade status transition from %q to %q", doc.Status, status) 258 } 259 260 if doc.PreviousVersion != info.doc.PreviousVersion { 261 return nil, errors.Errorf( 262 "current upgrade info mismatch: expected previous version %s, got %s", 263 info.doc.PreviousVersion, doc.PreviousVersion) 264 } 265 266 if doc.TargetVersion != info.doc.TargetVersion { 267 return nil, errors.Errorf( 268 "current upgrade info mismatch: expected target version %s, got %s", 269 info.doc.TargetVersion, doc.TargetVersion) 270 } 271 272 ops := []txn.Op{{ 273 C: upgradeInfoC, 274 Id: currentUpgradeId, 275 Assert: append(assertExpectedVersions(info.doc.PreviousVersion, info.doc.TargetVersion), assertSane...), 276 Update: bson.D{{"$set", bson.D{{"status", status}}}}, 277 }} 278 279 extraOps, err := upgradeStatusHistoryAndOps(info.st, status, info.st.clock().Now()) 280 if err != nil { 281 return nil, errors.Trace(err) 282 } 283 return append(ops, extraOps...), nil 284 } 285 286 return errors.Annotatef(info.st.db().Run(buildTxn), "setting upgrade status to %q", status) 287 } 288 289 // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the 290 // supplied versions. If a matching upgrade is in progress, that upgrade is returned; 291 // if there's a mismatch, an error is returned. 292 func (st *State) EnsureUpgradeInfo( 293 controllerId string, previousVersion, targetVersion version.Number, 294 ) (*UpgradeInfo, error) { 295 assertSanity, err := checkUpgradeInfoSanity(st, controllerId, previousVersion, targetVersion) 296 if err != nil { 297 return nil, errors.Trace(err) 298 } 299 300 doc := upgradeInfoDoc{ 301 Id: currentUpgradeId, 302 PreviousVersion: previousVersion, 303 TargetVersion: targetVersion, 304 Status: UpgradePending, 305 Started: st.clock().Now().UTC(), 306 ControllersReady: []string{controllerId}, 307 } 308 309 m, err := st.Model() 310 if err != nil { 311 return nil, errors.Trace(err) 312 } 313 hasMachine := m.Type() == ModelTypeIAAS 314 315 ops := []txn.Op{{ 316 C: upgradeInfoC, 317 Id: currentUpgradeId, 318 Assert: txn.DocMissing, 319 Insert: doc, 320 }} 321 if hasMachine { 322 machine, err := st.Machine(controllerId) 323 if err != nil { 324 return nil, errors.Trace(err) 325 } 326 327 ops = append(ops, txn.Op{ 328 C: instanceDataC, 329 Id: machine.doc.DocID, 330 Assert: txn.DocExists, 331 }) 332 } 333 if err := st.runRawTransaction(ops); err == nil { 334 return &UpgradeInfo{st: st, doc: doc}, nil 335 } else if err != txn.ErrAborted { 336 return nil, errors.Annotate(err, "cannot create upgrade info") 337 } 338 339 if hasMachine { 340 if provisioned, err := st.isMachineProvisioned(controllerId); err != nil { 341 return nil, errors.Trace(err) 342 } else if !provisioned { 343 return nil, errors.Errorf( 344 "machine %s is not provisioned and should not be participating in upgrades", 345 controllerId) 346 } 347 } 348 349 if info, err := ensureUpgradeInfoUpdated(st, controllerId, previousVersion, targetVersion); err == nil { 350 return info, nil 351 } else if errors.Cause(err) != errUpgradeInfoNotUpdated { 352 return nil, errors.Trace(err) 353 } 354 355 ops = []txn.Op{{ 356 C: upgradeInfoC, 357 Id: currentUpgradeId, 358 Assert: assertSanity, 359 Update: bson.D{{ 360 "$addToSet", bson.D{{"controllersReady", controllerId}}, 361 }}, 362 }} 363 switch err := st.db().RunTransaction(ops); err { 364 case nil: 365 return ensureUpgradeInfoUpdated(st, controllerId, previousVersion, targetVersion) 366 case txn.ErrAborted: 367 return nil, errors.New("upgrade info changed during update") 368 } 369 return nil, errors.Annotate(err, "cannot update upgrade info") 370 } 371 372 func (st *State) isMachineProvisioned(machineId string) (bool, error) { 373 instanceData, closer := st.db().GetRawCollection(instanceDataC) 374 defer closer() 375 376 for _, id := range []string{st.docID(machineId), machineId} { 377 count, err := instanceData.FindId(id).Count() 378 if err != nil { 379 return false, errors.Annotate(err, "cannot read instance data") 380 } 381 if count > 0 { 382 return true, nil 383 } 384 } 385 return false, nil 386 } 387 388 var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated") 389 390 func ensureUpgradeInfoUpdated(st *State, controllerId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 391 var doc upgradeInfoDoc 392 if pdoc, err := currentUpgradeInfoDoc(st); err != nil { 393 return nil, errors.Trace(err) 394 } else { 395 doc = *pdoc 396 } 397 398 if doc.PreviousVersion != previousVersion { 399 return nil, errors.Errorf( 400 "current upgrade info mismatch: expected previous version %s, got %s", 401 previousVersion, doc.PreviousVersion) 402 } 403 if doc.TargetVersion != targetVersion { 404 return nil, errors.Errorf( 405 "current upgrade info mismatch: expected target version %s, got %s", 406 targetVersion, doc.TargetVersion) 407 } 408 409 controllersReady := set.NewStrings(doc.ControllersReady...) 410 if !controllersReady.Contains(controllerId) { 411 return nil, errors.Trace(errUpgradeInfoNotUpdated) 412 } 413 return &UpgradeInfo{st: st, doc: doc}, nil 414 } 415 416 // SetControllerDone marks the supplied state controllerId as having 417 // completed its upgrades. When SetControllerDone is called by the 418 // last provisioned controller, the current upgrade info document 419 // will be archived with a status of UpgradeComplete. 420 func (info *UpgradeInfo) SetControllerDone(controllerId string) error { 421 assertSanity, err := checkUpgradeInfoSanity(info.st, controllerId, 422 info.doc.PreviousVersion, info.doc.TargetVersion) 423 if err != nil { 424 return errors.Trace(err) 425 } 426 427 buildTxn := func(attempt int) ([]txn.Op, error) { 428 doc, err := currentUpgradeInfoDoc(info.st) 429 if errors.IsNotFound(err) { 430 return nil, jujutxn.ErrNoOperations 431 } else if err != nil { 432 return nil, errors.Trace(err) 433 } 434 switch doc.Status { 435 case UpgradePending, UpgradeDBComplete: 436 return nil, errors.New("upgrade has not yet run") 437 } 438 439 controllersDone := set.NewStrings(doc.ControllersDone...) 440 if controllersDone.Contains(controllerId) { 441 return nil, jujutxn.ErrNoOperations 442 } 443 controllersDone.Add(controllerId) 444 445 controllersReady := set.NewStrings(doc.ControllersReady...) 446 controllersNotDone := controllersReady.Difference(controllersDone) 447 if controllersNotDone.IsEmpty() { 448 // This is the last controller. Archive the current 449 // upgradeInfo document. 450 doc.ControllersDone = controllersDone.SortedValues() 451 452 ops := info.makeArchiveOps(doc, UpgradeComplete) 453 extraOps, err := upgradeStatusHistoryAndOps(info.st, UpgradeComplete, info.st.clock().Now()) 454 if err != nil { 455 return nil, errors.Trace(err) 456 } 457 if len(extraOps) > 0 { 458 ops = append(ops, extraOps...) 459 } 460 461 return ops, nil 462 } 463 464 return []txn.Op{{ 465 C: upgradeInfoC, 466 Id: currentUpgradeId, 467 // This is not the last controller, but we need to be 468 // sure it still isn't when we run this. 469 Assert: append(assertSanity, bson.D{{ 470 "controllersDone", bson.D{{"$nin", controllersNotDone.Values()}}, 471 }}...), 472 Update: bson.D{{"$addToSet", bson.D{{"controllersDone", controllerId}}}}, 473 }}, nil 474 } 475 err = info.st.db().Run(buildTxn) 476 return errors.Annotate(err, "cannot complete upgrade") 477 } 478 479 // Abort marks the current upgrade as aborted. It should be called if 480 // the upgrade can't be completed for some reason. 481 func (info *UpgradeInfo) Abort() error { 482 buildTxn := func(attempt int) ([]txn.Op, error) { 483 doc, err := currentUpgradeInfoDoc(info.st) 484 if errors.IsNotFound(err) { 485 return nil, jujutxn.ErrNoOperations 486 } else if err != nil { 487 return nil, errors.Trace(err) 488 } 489 ops := info.makeArchiveOps(doc, UpgradeAborted) 490 extraOps, err := upgradeStatusHistoryAndOps(info.st, UpgradeAborted, info.st.clock().Now()) 491 if err != nil { 492 return nil, errors.Trace(err) 493 } 494 if len(extraOps) > 0 { 495 ops = append(ops, extraOps...) 496 } 497 498 return ops, nil 499 } 500 err := info.st.db().Run(buildTxn) 501 return errors.Annotate(err, "cannot abort upgrade") 502 } 503 504 func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op { 505 doc.Status = status 506 doc.Id = bson.NewObjectId().String() // change id to archive value 507 return []txn.Op{{ 508 C: upgradeInfoC, 509 Id: currentUpgradeId, 510 Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion), 511 Remove: true, 512 }, { 513 C: upgradeInfoC, 514 Id: doc.Id, 515 Assert: txn.DocMissing, 516 Insert: doc, 517 }} 518 } 519 520 // IsUpgrading returns true if an upgrade is currently in progress. 521 func (st *State) IsUpgrading() (bool, error) { 522 doc, err := currentUpgradeInfoDoc(st) 523 if doc != nil && err == nil { 524 return true, nil 525 } else if errors.IsNotFound(err) { 526 return false, nil 527 } else { 528 return false, errors.Trace(err) 529 } 530 } 531 532 // AbortCurrentUpgrade archives any current UpgradeInfo and sets its 533 // status to UpgradeAborted. Nothing happens if there's no current 534 // UpgradeInfo. 535 func (st *State) AbortCurrentUpgrade() error { 536 doc, err := currentUpgradeInfoDoc(st) 537 if err != nil { 538 if errors.IsNotFound(err) { 539 return nil 540 } 541 return errors.Trace(err) 542 } 543 info := &UpgradeInfo{st: st, doc: *doc} 544 return errors.Trace(info.Abort()) 545 546 } 547 548 func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) { 549 var doc upgradeInfoDoc 550 upgradeInfo, closer := st.db().GetCollection(upgradeInfoC) 551 defer closer() 552 if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound { 553 return nil, errors.NotFoundf("current upgrade info") 554 } else if err != nil { 555 return nil, errors.Annotate(err, "cannot read upgrade info") 556 } 557 return &doc, nil 558 } 559 560 func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) { 561 if previousVersion.Compare(targetVersion) != -1 { 562 return nil, errors.Errorf("cannot upgrade from %s to %s", previousVersion, targetVersion) 563 } 564 controllerIds, err := st.SafeControllerIds() 565 if err != nil { 566 return nil, errors.Annotate(err, "cannot read controller ids") 567 } 568 validIds := set.NewStrings(controllerIds...) 569 if !validIds.Contains(machineId) { 570 return nil, errors.Errorf("machine %q is not a controller", machineId) 571 } 572 return assertExpectedVersions(previousVersion, targetVersion), nil 573 } 574 575 func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D { 576 return bson.D{{ 577 "previousVersion", previousVersion, 578 }, { 579 "targetVersion", targetVersion, 580 }} 581 } 582 583 // ClearUpgradeInfo clears information about an upgrade in progress. It returns 584 // an error if no upgrade is current. 585 func (st *State) ClearUpgradeInfo() error { 586 ops := []txn.Op{{ 587 C: upgradeInfoC, 588 Id: currentUpgradeId, 589 Assert: txn.DocExists, 590 Remove: true, 591 }} 592 err := st.db().RunTransaction(ops) 593 return errors.Annotate(err, "cannot clear upgrade info") 594 }