github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/upgrade.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 /* 5 This file defines infrastructure for synchronising controller tools 6 upgrades. Synchronisation is handled via a mongo DB document in the 7 "upgradeInfo" collection. 8 9 The functionality here is intended to be used as follows: 10 11 1. When controllers come up running the new tools version, they call 12 EnsureUpgradeInfo before running upgrade steps. 13 14 2a. Any secondary controller watches the UpgradeInfo document and 15 waits for the status to change to UpgradeFinishing. 16 17 2b. The master controller watches the UpgradeInfo document and waits 18 for AllProvisionedControllersReady to return true. This indicates 19 that all provisioned controllers have called EnsureUpgradeInfo and 20 are ready to upgrade. 21 22 3. The master controller calls SetStatus with UpgradeRunning and 23 runs its upgrade steps. 24 25 4. The master controller calls SetStatus with UpgradeFinishing and 26 then calls SetControllerDone with it's own machine id. 27 28 5. Secondary controllers, seeing that the status has changed to 29 UpgradeFinishing, run their upgrade steps and then call 30 SetControllerDone when complete. 31 32 6. Once the final controller calls SetControllerDone, the status is 33 changed to UpgradeComplete and the upgradeInfo document is archived. 34 */ 35 36 package state 37 38 import ( 39 "time" 40 41 "github.com/juju/errors" 42 jujutxn "github.com/juju/txn" 43 "github.com/juju/utils/set" 44 "github.com/juju/version" 45 "gopkg.in/mgo.v2" 46 "gopkg.in/mgo.v2/bson" 47 "gopkg.in/mgo.v2/txn" 48 ) 49 50 // UpgradeStatus describes the states an upgrade operation may be in. 51 type UpgradeStatus string 52 53 const ( 54 // UpgradePending indicates that an upgrade is queued but not yet started. 55 UpgradePending UpgradeStatus = "pending" 56 57 // UpgradeRunning indicates that the master controller has started 58 // running upgrade logic, and other controllers are waiting for it. 59 UpgradeRunning UpgradeStatus = "running" 60 61 // UpgradeFinishing indicates that the master controller has finished 62 // running upgrade logic, and other controllers are catching up. 63 UpgradeFinishing UpgradeStatus = "finishing" 64 65 // UpgradeComplete indicates that all controllers have finished running 66 // upgrade logic. 67 UpgradeComplete UpgradeStatus = "complete" 68 69 // UpgradeAborted indicates that the upgrade wasn't completed due 70 // to some problem. 71 UpgradeAborted UpgradeStatus = "aborted" 72 73 // currentUpgradeId is the mongo _id of the current upgrade info document. 74 currentUpgradeId = "current" 75 ) 76 77 type upgradeInfoDoc struct { 78 Id string `bson:"_id"` 79 PreviousVersion version.Number `bson:"previousVersion"` 80 TargetVersion version.Number `bson:"targetVersion"` 81 Status UpgradeStatus `bson:"status"` 82 Started time.Time `bson:"started"` 83 ControllersReady []string `bson:"controllersReady"` 84 ControllersDone []string `bson:"controllersDone"` 85 } 86 87 // UpgradeInfo is used to synchronise controller upgrades. 88 type UpgradeInfo struct { 89 st *State 90 doc upgradeInfoDoc 91 } 92 93 // PreviousVersion returns the version being upgraded from. 94 func (info *UpgradeInfo) PreviousVersion() version.Number { 95 return info.doc.PreviousVersion 96 } 97 98 // TargetVersion returns the version being upgraded to. 99 func (info *UpgradeInfo) TargetVersion() version.Number { 100 return info.doc.TargetVersion 101 } 102 103 // Status returns the status of the upgrade. 104 func (info *UpgradeInfo) Status() UpgradeStatus { 105 return info.doc.Status 106 } 107 108 // Started returns the time at which the upgrade was started. 109 func (info *UpgradeInfo) Started() time.Time { 110 return info.doc.Started 111 } 112 113 // ControllersReady returns the machine ids for controllers that 114 // have signalled that they are ready for upgrade. 115 func (info *UpgradeInfo) ControllersReady() []string { 116 result := make([]string, len(info.doc.ControllersReady)) 117 copy(result, info.doc.ControllersReady) 118 return result 119 } 120 121 // ControllersDone returns the machine ids for controllers that 122 // have completed their upgrades. 123 func (info *UpgradeInfo) ControllersDone() []string { 124 result := make([]string, len(info.doc.ControllersDone)) 125 copy(result, info.doc.ControllersDone) 126 return result 127 } 128 129 // Refresh updates the contents of the UpgradeInfo from underlying state. 130 func (info *UpgradeInfo) Refresh() error { 131 doc, err := currentUpgradeInfoDoc(info.st) 132 if err != nil { 133 return errors.Trace(err) 134 } 135 info.doc = *doc 136 return nil 137 } 138 139 // Watch returns a watcher for the state underlying the current 140 // UpgradeInfo instance. This is provided purely for convenience. 141 func (info *UpgradeInfo) Watch() NotifyWatcher { 142 return info.st.WatchUpgradeInfo() 143 } 144 145 // AllProvisionedControllersReady returns true if and only if all controllers 146 // that have been started by the provisioner have called EnsureUpgradeInfo with 147 // matching versions. 148 // 149 // When this returns true the master state controller can begin it's 150 // own upgrade. 151 func (info *UpgradeInfo) AllProvisionedControllersReady() (bool, error) { 152 provisioned, err := info.getProvisionedControllers() 153 if err != nil { 154 return false, errors.Trace(err) 155 } 156 ready := set.NewStrings(info.doc.ControllersReady...) 157 missing := set.NewStrings(provisioned...).Difference(ready) 158 return missing.IsEmpty(), nil 159 } 160 161 func (info *UpgradeInfo) getProvisionedControllers() ([]string, error) { 162 var provisioned []string 163 164 controllerInfo, err := info.st.ControllerInfo() 165 if err != nil { 166 return provisioned, errors.Annotate(err, "cannot read controllers") 167 } 168 169 upgradeDone, err := info.isModelUUIDUpgradeDone() 170 if err != nil { 171 return provisioned, errors.Trace(err) 172 } 173 174 // Extract current and provisioned controllers. 175 instanceData, closer := info.st.getRawCollection(instanceDataC) 176 defer closer() 177 178 // If instanceData has the env UUID upgrade query using the 179 // machineid field, otherwise check using _id. 180 var sel bson.D 181 var field string 182 if upgradeDone { 183 sel = bson.D{{"model-uuid", info.st.ModelUUID()}} 184 field = "machineid" 185 } else { 186 field = "_id" 187 } 188 sel = append(sel, bson.DocElem{field, bson.D{{"$in", controllerInfo.MachineIds}}}) 189 iter := instanceData.Find(sel).Select(bson.D{{field, true}}).Iter() 190 191 var doc bson.M 192 for iter.Next(&doc) { 193 provisioned = append(provisioned, doc[field].(string)) 194 } 195 if err := iter.Close(); err != nil { 196 return provisioned, errors.Annotate(err, "cannot read provisioned machines") 197 } 198 return provisioned, nil 199 } 200 201 func (info *UpgradeInfo) isModelUUIDUpgradeDone() (bool, error) { 202 instanceData, closer := info.st.getRawCollection(instanceDataC) 203 defer closer() 204 205 query := instanceData.Find(bson.D{{"model-uuid", bson.D{{"$exists", true}}}}) 206 n, err := query.Count() 207 if err != nil { 208 return false, errors.Annotatef(err, "couldn't query instance upgrade status") 209 } 210 return n > 0, nil 211 } 212 213 // SetStatus sets the status of the current upgrade. Checks are made 214 // to ensure that status changes are performed in the correct order. 215 func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error { 216 var assertSane bson.D 217 switch status { 218 case UpgradePending, UpgradeComplete, UpgradeAborted: 219 return errors.Errorf("cannot explicitly set upgrade status to \"%s\"", status) 220 case UpgradeRunning: 221 assertSane = bson.D{{"status", bson.D{{"$in", 222 []UpgradeStatus{UpgradePending, UpgradeRunning}, 223 }}}} 224 case UpgradeFinishing: 225 assertSane = bson.D{{"status", bson.D{{"$in", 226 []UpgradeStatus{UpgradeRunning, UpgradeFinishing}, 227 }}}} 228 default: 229 return errors.Errorf("unknown upgrade status: %s", status) 230 } 231 if info.doc.Id != currentUpgradeId { 232 return errors.New("cannot set status on non-current upgrade") 233 } 234 235 ops := []txn.Op{{ 236 C: upgradeInfoC, 237 Id: currentUpgradeId, 238 Assert: append(bson.D{{ 239 "previousVersion", info.doc.PreviousVersion, 240 }, { 241 "targetVersion", info.doc.TargetVersion, 242 }}, assertSane...), 243 Update: bson.D{{"$set", bson.D{{"status", status}}}}, 244 }} 245 err := info.st.runTransaction(ops) 246 if err == txn.ErrAborted { 247 return errors.Errorf("cannot set upgrade status to %q: Another "+ 248 "status change may have occurred concurrently", status) 249 } 250 return errors.Annotate(err, "cannot set upgrade status") 251 } 252 253 // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the 254 // supplied versions. If a matching upgrade is in progress, that upgrade is returned; 255 // if there's a mismatch, an error is returned. The supplied machine id must correspond 256 // to a current controller. 257 func (st *State) EnsureUpgradeInfo(machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 258 259 assertSanity, err := checkUpgradeInfoSanity(st, machineId, previousVersion, targetVersion) 260 if err != nil { 261 return nil, errors.Trace(err) 262 } 263 264 doc := upgradeInfoDoc{ 265 Id: currentUpgradeId, 266 PreviousVersion: previousVersion, 267 TargetVersion: targetVersion, 268 Status: UpgradePending, 269 Started: st.clock.Now().UTC(), 270 ControllersReady: []string{machineId}, 271 } 272 273 machine, err := st.Machine(machineId) 274 if err != nil { 275 return nil, errors.Trace(err) 276 } 277 278 ops := []txn.Op{{ 279 C: upgradeInfoC, 280 Id: currentUpgradeId, 281 Assert: txn.DocMissing, 282 Insert: doc, 283 }, { 284 C: instanceDataC, 285 Id: machine.doc.DocID, 286 Assert: txn.DocExists, 287 }} 288 if err := st.runRawTransaction(ops); err == nil { 289 return &UpgradeInfo{st: st, doc: doc}, nil 290 } else if err != txn.ErrAborted { 291 return nil, errors.Annotate(err, "cannot create upgrade info") 292 } 293 294 if provisioned, err := st.isMachineProvisioned(machineId); err != nil { 295 return nil, errors.Trace(err) 296 } else if !provisioned { 297 return nil, errors.Errorf( 298 "machine %s is not provisioned and should not be participating in upgrades", 299 machineId) 300 } 301 302 if info, err := ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion); err == nil { 303 return info, nil 304 } else if errors.Cause(err) != errUpgradeInfoNotUpdated { 305 return nil, errors.Trace(err) 306 } 307 308 ops = []txn.Op{{ 309 C: upgradeInfoC, 310 Id: currentUpgradeId, 311 Assert: assertSanity, 312 Update: bson.D{{ 313 "$addToSet", bson.D{{"controllersReady", machineId}}, 314 }}, 315 }} 316 switch err := st.runTransaction(ops); err { 317 case nil: 318 return ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion) 319 case txn.ErrAborted: 320 return nil, errors.New("upgrade info changed during update") 321 } 322 return nil, errors.Annotate(err, "cannot update upgrade info") 323 } 324 325 func (st *State) isMachineProvisioned(machineId string) (bool, error) { 326 instanceData, closer := st.getRawCollection(instanceDataC) 327 defer closer() 328 329 for _, id := range []string{st.docID(machineId), machineId} { 330 count, err := instanceData.FindId(id).Count() 331 if err != nil { 332 return false, errors.Annotate(err, "cannot read instance data") 333 } 334 if count > 0 { 335 return true, nil 336 } 337 } 338 return false, nil 339 } 340 341 var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated") 342 343 func ensureUpgradeInfoUpdated(st *State, machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 344 var doc upgradeInfoDoc 345 if pdoc, err := currentUpgradeInfoDoc(st); err != nil { 346 return nil, errors.Trace(err) 347 } else { 348 doc = *pdoc 349 } 350 351 if doc.PreviousVersion != previousVersion { 352 return nil, errors.Errorf( 353 "current upgrade info mismatch: expected previous version %s, got %s", 354 previousVersion, doc.PreviousVersion) 355 } 356 if doc.TargetVersion != targetVersion { 357 return nil, errors.Errorf( 358 "current upgrade info mismatch: expected target version %s, got %s", 359 targetVersion, doc.TargetVersion) 360 } 361 362 controllersReady := set.NewStrings(doc.ControllersReady...) 363 if !controllersReady.Contains(machineId) { 364 return nil, errors.Trace(errUpgradeInfoNotUpdated) 365 } 366 return &UpgradeInfo{st: st, doc: doc}, nil 367 } 368 369 // SetControllerDone marks the supplied state machineId as having 370 // completed its upgrades. When SetControllerDone is called by the 371 // last provisioned controller, the current upgrade info document 372 // will be archived with a status of UpgradeComplete. 373 func (info *UpgradeInfo) SetControllerDone(machineId string) error { 374 assertSanity, err := checkUpgradeInfoSanity(info.st, machineId, 375 info.doc.PreviousVersion, info.doc.TargetVersion) 376 if err != nil { 377 return errors.Trace(err) 378 } 379 380 buildTxn := func(attempt int) ([]txn.Op, error) { 381 doc, err := currentUpgradeInfoDoc(info.st) 382 if errors.IsNotFound(err) { 383 return nil, jujutxn.ErrNoOperations 384 } else if err != nil { 385 return nil, errors.Trace(err) 386 } 387 switch doc.Status { 388 case UpgradePending, UpgradeRunning: 389 return nil, errors.New("upgrade has not yet run") 390 } 391 392 controllersDone := set.NewStrings(doc.ControllersDone...) 393 if controllersDone.Contains(machineId) { 394 return nil, jujutxn.ErrNoOperations 395 } 396 controllersDone.Add(machineId) 397 398 controllersReady := set.NewStrings(doc.ControllersReady...) 399 controllersNotDone := controllersReady.Difference(controllersDone) 400 if controllersNotDone.IsEmpty() { 401 // This is the last controller. Archive the current 402 // upgradeInfo document. 403 doc.ControllersDone = controllersDone.SortedValues() 404 return info.makeArchiveOps(doc, UpgradeComplete), nil 405 } 406 407 return []txn.Op{{ 408 C: upgradeInfoC, 409 Id: currentUpgradeId, 410 // This is not the last controller, but we need to be 411 // sure it still isn't when we run this. 412 Assert: append(assertSanity, bson.D{{ 413 "controllersDone", bson.D{{"$nin", controllersNotDone.Values()}}, 414 }}...), 415 Update: bson.D{{"$addToSet", bson.D{{"controllersDone", machineId}}}}, 416 }}, nil 417 } 418 err = info.st.run(buildTxn) 419 return errors.Annotate(err, "cannot complete upgrade") 420 } 421 422 // Abort marks the current upgrade as aborted. It should be called if 423 // the upgrade can't be completed for some reason. 424 func (info *UpgradeInfo) Abort() error { 425 buildTxn := func(attempt int) ([]txn.Op, error) { 426 doc, err := currentUpgradeInfoDoc(info.st) 427 if errors.IsNotFound(err) { 428 return nil, jujutxn.ErrNoOperations 429 } else if err != nil { 430 return nil, errors.Trace(err) 431 } 432 return info.makeArchiveOps(doc, UpgradeAborted), nil 433 } 434 err := info.st.run(buildTxn) 435 return errors.Annotate(err, "cannot abort upgrade") 436 } 437 438 func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op { 439 doc.Status = status 440 doc.Id = bson.NewObjectId().String() // change id to archive value 441 return []txn.Op{{ 442 C: upgradeInfoC, 443 Id: currentUpgradeId, 444 Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion), 445 Remove: true, 446 }, { 447 C: upgradeInfoC, 448 Id: doc.Id, 449 Assert: txn.DocMissing, 450 Insert: doc, 451 }} 452 } 453 454 // IsUpgrading returns true if an upgrade is currently in progress. 455 func (st *State) IsUpgrading() (bool, error) { 456 doc, err := currentUpgradeInfoDoc(st) 457 if doc != nil && err == nil { 458 return true, nil 459 } else if errors.IsNotFound(err) { 460 return false, nil 461 } else { 462 return false, errors.Trace(err) 463 } 464 } 465 466 // AbortCurrentUpgrade archives any current UpgradeInfo and sets its 467 // status to UpgradeAborted. Nothing happens if there's no current 468 // UpgradeInfo. 469 func (st *State) AbortCurrentUpgrade() error { 470 doc, err := currentUpgradeInfoDoc(st) 471 if err != nil { 472 if errors.IsNotFound(err) { 473 return nil 474 } 475 return errors.Trace(err) 476 } 477 info := &UpgradeInfo{st: st, doc: *doc} 478 return errors.Trace(info.Abort()) 479 480 } 481 482 func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) { 483 var doc upgradeInfoDoc 484 upgradeInfo, closer := st.getCollection(upgradeInfoC) 485 defer closer() 486 if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound { 487 return nil, errors.NotFoundf("current upgrade info") 488 } else if err != nil { 489 return nil, errors.Annotate(err, "cannot read upgrade info") 490 } 491 return &doc, nil 492 } 493 494 func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) { 495 if previousVersion.Compare(targetVersion) != -1 { 496 return nil, errors.Errorf("cannot sanely upgrade from %s to %s", previousVersion, targetVersion) 497 } 498 controllerInfo, err := st.ControllerInfo() 499 if err != nil { 500 return nil, errors.Annotate(err, "cannot read controllers") 501 } 502 validIds := set.NewStrings(controllerInfo.MachineIds...) 503 if !validIds.Contains(machineId) { 504 return nil, errors.Errorf("machine %q is not a controller", machineId) 505 } 506 return assertExpectedVersions(previousVersion, targetVersion), nil 507 } 508 509 func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D { 510 return bson.D{{ 511 "previousVersion", previousVersion, 512 }, { 513 "targetVersion", targetVersion, 514 }} 515 } 516 517 // ClearUpgradeInfo clears information about an upgrade in progress. It returns 518 // an error if no upgrade is current. 519 func (st *State) ClearUpgradeInfo() error { 520 ops := []txn.Op{{ 521 C: upgradeInfoC, 522 Id: currentUpgradeId, 523 Assert: txn.DocExists, 524 Remove: true, 525 }} 526 err := st.runTransaction(ops) 527 return errors.Annotate(err, "cannot clear upgrade info") 528 }