github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/upgrade.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 /* 5 This file defines infrastructure for synchronising controller tools 6 upgrades. Synchronisation is handled via a mongo DB document in the 7 "upgradeInfo" collection. 8 9 The functionality here is intended to be used as follows: 10 11 1. When controllers come up running the new tools version, they call 12 EnsureUpgradeInfo before running upgrade steps. 13 14 2a. Any secondary controller watches the UpgradeInfo document and 15 waits for the status to change to UpgradeFinishing. 16 17 2b. The master controller watches the UpgradeInfo document and waits 18 for AllProvisionedControllersReady to return true. This indicates 19 that all provisioned controllers have called EnsureUpgradeInfo and 20 are ready to upgrade. 21 22 3. The master controller calls SetStatus with UpgradeRunning and 23 runs its upgrade steps. 24 25 4. The master controller calls SetStatus with UpgradeFinishing and 26 then calls SetControllerDone with it's own machine id. 27 28 5. Secondary controllers, seeing that the status has changed to 29 UpgradeFinishing, run their upgrade steps and then call 30 SetControllerDone when complete. 31 32 6. Once the final controller calls SetControllerDone, the status is 33 changed to UpgradeComplete and the upgradeInfo document is archived. 34 */ 35 36 package state 37 38 import ( 39 "time" 40 41 "github.com/juju/errors" 42 jujutxn "github.com/juju/txn" 43 "github.com/juju/utils/set" 44 "github.com/juju/version" 45 "gopkg.in/mgo.v2" 46 "gopkg.in/mgo.v2/bson" 47 "gopkg.in/mgo.v2/txn" 48 ) 49 50 // UpgradeStatus describes the states an upgrade operation may be in. 51 type UpgradeStatus string 52 53 const ( 54 // UpgradePending indicates that an upgrade is queued but not yet started. 55 UpgradePending UpgradeStatus = "pending" 56 57 // UpgradeRunning indicates that the master controller has started 58 // running upgrade logic, and other controllers are waiting for it. 59 UpgradeRunning UpgradeStatus = "running" 60 61 // UpgradeFinishing indicates that the master controller has finished 62 // running upgrade logic, and other controllers are catching up. 63 UpgradeFinishing UpgradeStatus = "finishing" 64 65 // UpgradeComplete indicates that all controllers have finished running 66 // upgrade logic. 67 UpgradeComplete UpgradeStatus = "complete" 68 69 // UpgradeAborted indicates that the upgrade wasn't completed due 70 // to some problem. 71 UpgradeAborted UpgradeStatus = "aborted" 72 73 // currentUpgradeId is the mongo _id of the current upgrade info document. 74 currentUpgradeId = "current" 75 ) 76 77 type upgradeInfoDoc struct { 78 Id string `bson:"_id"` 79 PreviousVersion version.Number `bson:"previousVersion"` 80 TargetVersion version.Number `bson:"targetVersion"` 81 Status UpgradeStatus `bson:"status"` 82 Started time.Time `bson:"started"` 83 ControllersReady []string `bson:"controllersReady"` 84 ControllersDone []string `bson:"controllersDone"` 85 } 86 87 // UpgradeInfo is used to synchronise controller upgrades. 88 type UpgradeInfo struct { 89 st *State 90 doc upgradeInfoDoc 91 } 92 93 // PreviousVersion returns the version being upgraded from. 94 func (info *UpgradeInfo) PreviousVersion() version.Number { 95 return info.doc.PreviousVersion 96 } 97 98 // TargetVersion returns the version being upgraded to. 99 func (info *UpgradeInfo) TargetVersion() version.Number { 100 return info.doc.TargetVersion 101 } 102 103 // Status returns the status of the upgrade. 104 func (info *UpgradeInfo) Status() UpgradeStatus { 105 return info.doc.Status 106 } 107 108 // Started returns the time at which the upgrade was started. 109 func (info *UpgradeInfo) Started() time.Time { 110 return info.doc.Started 111 } 112 113 // ControllersReady returns the machine ids for controllers that 114 // have signalled that they are ready for upgrade. 115 func (info *UpgradeInfo) ControllersReady() []string { 116 result := make([]string, len(info.doc.ControllersReady)) 117 copy(result, info.doc.ControllersReady) 118 return result 119 } 120 121 // ControllersDone returns the machine ids for controllers that 122 // have completed their upgrades. 123 func (info *UpgradeInfo) ControllersDone() []string { 124 result := make([]string, len(info.doc.ControllersDone)) 125 copy(result, info.doc.ControllersDone) 126 return result 127 } 128 129 // Refresh updates the contents of the UpgradeInfo from underlying state. 130 func (info *UpgradeInfo) Refresh() error { 131 doc, err := currentUpgradeInfoDoc(info.st) 132 if err != nil { 133 return errors.Trace(err) 134 } 135 info.doc = *doc 136 return nil 137 } 138 139 // Watch returns a watcher for the state underlying the current 140 // UpgradeInfo instance. This is provided purely for convenience. 141 func (info *UpgradeInfo) Watch() NotifyWatcher { 142 return info.st.WatchUpgradeInfo() 143 } 144 145 // AllProvisionedControllersReady returns true if and only if all controllers 146 // that have been started by the provisioner have called EnsureUpgradeInfo with 147 // matching versions. 148 // 149 // When this returns true the master state controller can begin it's 150 // own upgrade. 151 func (info *UpgradeInfo) AllProvisionedControllersReady() (bool, error) { 152 provisioned, err := info.getProvisionedControllers() 153 if err != nil { 154 return false, errors.Trace(err) 155 } 156 ready := set.NewStrings(info.doc.ControllersReady...) 157 missing := set.NewStrings(provisioned...).Difference(ready) 158 return missing.IsEmpty(), nil 159 } 160 161 func (info *UpgradeInfo) getProvisionedControllers() ([]string, error) { 162 var provisioned []string 163 164 controllerInfo, err := info.st.ControllerInfo() 165 if err != nil { 166 return provisioned, errors.Annotate(err, "cannot read controllers") 167 } 168 169 upgradeDone, err := info.isModelUUIDUpgradeDone() 170 if err != nil { 171 return provisioned, errors.Trace(err) 172 } 173 174 // Extract current and provisioned controllers. 175 instanceData, closer := info.st.getRawCollection(instanceDataC) 176 defer closer() 177 178 // If instanceData has the env UUID upgrade query using the 179 // machineid field, otherwise check using _id. 180 var sel bson.D 181 var field string 182 if upgradeDone { 183 sel = bson.D{{"model-uuid", info.st.ModelUUID()}} 184 field = "machineid" 185 } else { 186 field = "_id" 187 } 188 sel = append(sel, bson.DocElem{field, bson.D{{"$in", controllerInfo.MachineIds}}}) 189 iter := instanceData.Find(sel).Select(bson.D{{field, true}}).Iter() 190 191 var doc bson.M 192 for iter.Next(&doc) { 193 provisioned = append(provisioned, doc[field].(string)) 194 } 195 if err := iter.Close(); err != nil { 196 return provisioned, errors.Annotate(err, "cannot read provisioned machines") 197 } 198 return provisioned, nil 199 } 200 201 func (info *UpgradeInfo) isModelUUIDUpgradeDone() (bool, error) { 202 instanceData, closer := info.st.getRawCollection(instanceDataC) 203 defer closer() 204 205 query := instanceData.Find(bson.D{{"model-uuid", bson.D{{"$exists", true}}}}) 206 n, err := query.Count() 207 if err != nil { 208 return false, errors.Annotatef(err, "couldn't query instance upgrade status") 209 } 210 return n > 0, nil 211 } 212 213 // SetStatus sets the status of the current upgrade. Checks are made 214 // to ensure that status changes are performed in the correct order. 215 func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error { 216 var assertSane bson.D 217 switch status { 218 case UpgradePending, UpgradeComplete, UpgradeAborted: 219 return errors.Errorf("cannot explicitly set upgrade status to \"%s\"", status) 220 case UpgradeRunning: 221 assertSane = bson.D{{"status", bson.D{{"$in", 222 []UpgradeStatus{UpgradePending, UpgradeRunning}, 223 }}}} 224 case UpgradeFinishing: 225 assertSane = bson.D{{"status", bson.D{{"$in", 226 []UpgradeStatus{UpgradeRunning, UpgradeFinishing}, 227 }}}} 228 default: 229 return errors.Errorf("unknown upgrade status: %s", status) 230 } 231 if info.doc.Id != currentUpgradeId { 232 return errors.New("cannot set status on non-current upgrade") 233 } 234 235 ops := []txn.Op{{ 236 C: upgradeInfoC, 237 Id: currentUpgradeId, 238 Assert: append(bson.D{{ 239 "previousVersion", info.doc.PreviousVersion, 240 }, { 241 "targetVersion", info.doc.TargetVersion, 242 }}, assertSane...), 243 Update: bson.D{{"$set", bson.D{{"status", status}}}}, 244 }} 245 err := info.st.runTransaction(ops) 246 if err == txn.ErrAborted { 247 return errors.Errorf("cannot set upgrade status to %q: Another "+ 248 "status change may have occurred concurrently", status) 249 } 250 return errors.Annotate(err, "cannot set upgrade status") 251 } 252 253 // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the 254 // supplied versions. If a matching upgrade is in progress, that upgrade is returned; 255 // if there's a mismatch, an error is returned. The supplied machine id must correspond 256 // to a current controller. 257 func (st *State) EnsureUpgradeInfo(machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 258 259 assertSanity, err := checkUpgradeInfoSanity(st, machineId, previousVersion, targetVersion) 260 if err != nil { 261 return nil, errors.Trace(err) 262 } 263 264 doc := upgradeInfoDoc{ 265 Id: currentUpgradeId, 266 PreviousVersion: previousVersion, 267 TargetVersion: targetVersion, 268 Status: UpgradePending, 269 // TODO(fwereade): 2016-03-17 lp:1558657 270 Started: time.Now().UTC(), 271 ControllersReady: []string{machineId}, 272 } 273 274 machine, err := st.Machine(machineId) 275 if err != nil { 276 return nil, errors.Trace(err) 277 } 278 279 ops := []txn.Op{{ 280 C: upgradeInfoC, 281 Id: currentUpgradeId, 282 Assert: txn.DocMissing, 283 Insert: doc, 284 }, { 285 C: instanceDataC, 286 Id: machine.doc.DocID, 287 Assert: txn.DocExists, 288 }} 289 if err := st.runRawTransaction(ops); err == nil { 290 return &UpgradeInfo{st: st, doc: doc}, nil 291 } else if err != txn.ErrAborted { 292 return nil, errors.Annotate(err, "cannot create upgrade info") 293 } 294 295 if provisioned, err := st.isMachineProvisioned(machineId); err != nil { 296 return nil, errors.Trace(err) 297 } else if !provisioned { 298 return nil, errors.Errorf( 299 "machine %s is not provisioned and should not be participating in upgrades", 300 machineId) 301 } 302 303 if info, err := ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion); err == nil { 304 return info, nil 305 } else if errors.Cause(err) != errUpgradeInfoNotUpdated { 306 return nil, errors.Trace(err) 307 } 308 309 ops = []txn.Op{{ 310 C: upgradeInfoC, 311 Id: currentUpgradeId, 312 Assert: assertSanity, 313 Update: bson.D{{ 314 "$addToSet", bson.D{{"controllersReady", machineId}}, 315 }}, 316 }} 317 switch err := st.runTransaction(ops); err { 318 case nil: 319 return ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion) 320 case txn.ErrAborted: 321 return nil, errors.New("upgrade info changed during update") 322 } 323 return nil, errors.Annotate(err, "cannot update upgrade info") 324 } 325 326 func (st *State) isMachineProvisioned(machineId string) (bool, error) { 327 instanceData, closer := st.getRawCollection(instanceDataC) 328 defer closer() 329 330 for _, id := range []string{st.docID(machineId), machineId} { 331 count, err := instanceData.FindId(id).Count() 332 if err != nil { 333 return false, errors.Annotate(err, "cannot read instance data") 334 } 335 if count > 0 { 336 return true, nil 337 } 338 } 339 return false, nil 340 } 341 342 var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated") 343 344 func ensureUpgradeInfoUpdated(st *State, machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 345 var doc upgradeInfoDoc 346 if pdoc, err := currentUpgradeInfoDoc(st); err != nil { 347 return nil, errors.Trace(err) 348 } else { 349 doc = *pdoc 350 } 351 352 if doc.PreviousVersion != previousVersion { 353 return nil, errors.Errorf( 354 "current upgrade info mismatch: expected previous version %s, got %s", 355 previousVersion, doc.PreviousVersion) 356 } 357 if doc.TargetVersion != targetVersion { 358 return nil, errors.Errorf( 359 "current upgrade info mismatch: expected target version %s, got %s", 360 targetVersion, doc.TargetVersion) 361 } 362 363 controllersReady := set.NewStrings(doc.ControllersReady...) 364 if !controllersReady.Contains(machineId) { 365 return nil, errors.Trace(errUpgradeInfoNotUpdated) 366 } 367 return &UpgradeInfo{st: st, doc: doc}, nil 368 } 369 370 // SetControllerDone marks the supplied state machineId as having 371 // completed its upgrades. When SetControllerDone is called by the 372 // last provisioned controller, the current upgrade info document 373 // will be archived with a status of UpgradeComplete. 374 func (info *UpgradeInfo) SetControllerDone(machineId string) error { 375 assertSanity, err := checkUpgradeInfoSanity(info.st, machineId, 376 info.doc.PreviousVersion, info.doc.TargetVersion) 377 if err != nil { 378 return errors.Trace(err) 379 } 380 381 buildTxn := func(attempt int) ([]txn.Op, error) { 382 doc, err := currentUpgradeInfoDoc(info.st) 383 if errors.IsNotFound(err) { 384 return nil, jujutxn.ErrNoOperations 385 } else if err != nil { 386 return nil, errors.Trace(err) 387 } 388 switch doc.Status { 389 case UpgradePending, UpgradeRunning: 390 return nil, errors.New("upgrade has not yet run") 391 } 392 393 controllersDone := set.NewStrings(doc.ControllersDone...) 394 if controllersDone.Contains(machineId) { 395 return nil, jujutxn.ErrNoOperations 396 } 397 controllersDone.Add(machineId) 398 399 controllersReady := set.NewStrings(doc.ControllersReady...) 400 controllersNotDone := controllersReady.Difference(controllersDone) 401 if controllersNotDone.IsEmpty() { 402 // This is the last controller. Archive the current 403 // upgradeInfo document. 404 doc.ControllersDone = controllersDone.SortedValues() 405 return info.makeArchiveOps(doc, UpgradeComplete), nil 406 } 407 408 return []txn.Op{{ 409 C: upgradeInfoC, 410 Id: currentUpgradeId, 411 // This is not the last controller, but we need to be 412 // sure it still isn't when we run this. 413 Assert: append(assertSanity, bson.D{{ 414 "controllersDone", bson.D{{"$nin", controllersNotDone.Values()}}, 415 }}...), 416 Update: bson.D{{"$addToSet", bson.D{{"controllersDone", machineId}}}}, 417 }}, nil 418 } 419 err = info.st.run(buildTxn) 420 return errors.Annotate(err, "cannot complete upgrade") 421 } 422 423 // Abort marks the current upgrade as aborted. It should be called if 424 // the upgrade can't be completed for some reason. 425 func (info *UpgradeInfo) Abort() error { 426 buildTxn := func(attempt int) ([]txn.Op, error) { 427 doc, err := currentUpgradeInfoDoc(info.st) 428 if errors.IsNotFound(err) { 429 return nil, jujutxn.ErrNoOperations 430 } else if err != nil { 431 return nil, errors.Trace(err) 432 } 433 return info.makeArchiveOps(doc, UpgradeAborted), nil 434 } 435 err := info.st.run(buildTxn) 436 return errors.Annotate(err, "cannot abort upgrade") 437 } 438 439 func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op { 440 doc.Status = status 441 doc.Id = bson.NewObjectId().String() // change id to archive value 442 return []txn.Op{{ 443 C: upgradeInfoC, 444 Id: currentUpgradeId, 445 Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion), 446 Remove: true, 447 }, { 448 C: upgradeInfoC, 449 Id: doc.Id, 450 Assert: txn.DocMissing, 451 Insert: doc, 452 }} 453 } 454 455 // IsUpgrading returns true if an upgrade is currently in progress. 456 func (st *State) IsUpgrading() (bool, error) { 457 doc, err := currentUpgradeInfoDoc(st) 458 if doc != nil && err == nil { 459 return true, nil 460 } else if errors.IsNotFound(err) { 461 return false, nil 462 } else { 463 return false, errors.Trace(err) 464 } 465 } 466 467 // AbortCurrentUpgrade archives any current UpgradeInfo and sets its 468 // status to UpgradeAborted. Nothing happens if there's no current 469 // UpgradeInfo. 470 func (st *State) AbortCurrentUpgrade() error { 471 doc, err := currentUpgradeInfoDoc(st) 472 if err != nil { 473 if errors.IsNotFound(err) { 474 return nil 475 } 476 return errors.Trace(err) 477 } 478 info := &UpgradeInfo{st: st, doc: *doc} 479 return errors.Trace(info.Abort()) 480 481 } 482 483 func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) { 484 var doc upgradeInfoDoc 485 upgradeInfo, closer := st.getCollection(upgradeInfoC) 486 defer closer() 487 if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound { 488 return nil, errors.NotFoundf("current upgrade info") 489 } else if err != nil { 490 return nil, errors.Annotate(err, "cannot read upgrade info") 491 } 492 return &doc, nil 493 } 494 495 func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) { 496 if previousVersion.Compare(targetVersion) != -1 { 497 return nil, errors.Errorf("cannot sanely upgrade from %s to %s", previousVersion, targetVersion) 498 } 499 controllerInfo, err := st.ControllerInfo() 500 if err != nil { 501 return nil, errors.Annotate(err, "cannot read controllers") 502 } 503 validIds := set.NewStrings(controllerInfo.MachineIds...) 504 if !validIds.Contains(machineId) { 505 return nil, errors.Errorf("machine %q is not a controller", machineId) 506 } 507 return assertExpectedVersions(previousVersion, targetVersion), nil 508 } 509 510 func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D { 511 return bson.D{{ 512 "previousVersion", previousVersion, 513 }, { 514 "targetVersion", targetVersion, 515 }} 516 } 517 518 // ClearUpgradeInfo clears information about an upgrade in progress. It returns 519 // an error if no upgrade is current. 520 func (st *State) ClearUpgradeInfo() error { 521 ops := []txn.Op{{ 522 C: upgradeInfoC, 523 Id: currentUpgradeId, 524 Assert: txn.DocExists, 525 Remove: true, 526 }} 527 err := st.runTransaction(ops) 528 return errors.Annotate(err, "cannot clear upgrade info") 529 }