github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/state/upgrade.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 /* 5 This file defines infrastructure for synchronising state server tools 6 upgrades. Synchronisation is handled via a mongo DB document in the 7 "upgradeInfo" collection. 8 9 The functionality here is intended to be used as follows: 10 11 1. When state servers come up running the new tools version, they call 12 EnsureUpgradeInfo before running upgrade steps. 13 14 2a. Any secondary state server watches the UpgradeInfo document and 15 waits for the status to change to UpgradeFinishing. 16 17 2b. The master state server watches the UpgradeInfo document and waits 18 for AllProvisionedStateServersReady to return true. This indicates 19 that all provisioned state servers have called EnsureUpgradeInfo and 20 are ready to upgrade. 21 22 3. The master state server calls SetStatus with UpgradeRunning and 23 runs its upgrade steps. 24 25 4. The master state server calls SetStatus with UpgradeFinishing and 26 then calls SetStateServerDone with it's own machine id. 27 28 5. Secondary state servers, seeing that the status has changed to 29 UpgradeFinishing, run their upgrade steps and then call 30 SetStateServerDone when complete. 31 32 6. Once the final state server calls SetStateServerDone, the status is 33 changed to UpgradeComplete and the upgradeInfo document is archived. 34 */ 35 36 package state 37 38 import ( 39 "time" 40 41 "github.com/juju/errors" 42 jujutxn "github.com/juju/txn" 43 "github.com/juju/utils/set" 44 "gopkg.in/mgo.v2" 45 "gopkg.in/mgo.v2/bson" 46 "gopkg.in/mgo.v2/txn" 47 48 "github.com/juju/juju/version" 49 ) 50 51 // UpgradeStatus describes the states an upgrade operation may be in. 52 type UpgradeStatus string 53 54 const ( 55 // UpgradePending indicates that an upgrade is queued but not yet started. 56 UpgradePending UpgradeStatus = "pending" 57 58 // UpgradeRunning indicates that the master state server has started 59 // running upgrade logic, and other state servers are waiting for it. 60 UpgradeRunning UpgradeStatus = "running" 61 62 // UpgradeFinishing indicates that the master state server has finished 63 // running upgrade logic, and other state servers are catching up. 64 UpgradeFinishing UpgradeStatus = "finishing" 65 66 // UpgradeComplete indicates that all state servers have finished running 67 // upgrade logic. 68 UpgradeComplete UpgradeStatus = "complete" 69 70 // UpgradeAborted indicates that the upgrade wasn't completed due 71 // to some problem. 72 UpgradeAborted UpgradeStatus = "aborted" 73 74 // currentUpgradeId is the mongo _id of the current upgrade info document. 75 currentUpgradeId = "current" 76 ) 77 78 type upgradeInfoDoc struct { 79 Id string `bson:"_id"` 80 PreviousVersion version.Number `bson:"previousVersion"` 81 TargetVersion version.Number `bson:"targetVersion"` 82 Status UpgradeStatus `bson:"status"` 83 Started time.Time `bson:"started"` 84 StateServersReady []string `bson:"stateServersReady"` 85 StateServersDone []string `bson:"stateServersDone"` 86 } 87 88 // UpgradeInfo is used to synchronise state server upgrades. 89 type UpgradeInfo struct { 90 st *State 91 doc upgradeInfoDoc 92 } 93 94 // PreviousVersion returns the version being upgraded from. 95 func (info *UpgradeInfo) PreviousVersion() version.Number { 96 return info.doc.PreviousVersion 97 } 98 99 // TargetVersion returns the version being upgraded to. 100 func (info *UpgradeInfo) TargetVersion() version.Number { 101 return info.doc.TargetVersion 102 } 103 104 // Status returns the status of the upgrade. 105 func (info *UpgradeInfo) Status() UpgradeStatus { 106 return info.doc.Status 107 } 108 109 // Started returns the time at which the upgrade was started. 110 func (info *UpgradeInfo) Started() time.Time { 111 return info.doc.Started 112 } 113 114 // StateServersReady returns the machine ids for state servers that 115 // have signalled that they are ready for upgrade. 116 func (info *UpgradeInfo) StateServersReady() []string { 117 result := make([]string, len(info.doc.StateServersReady)) 118 copy(result, info.doc.StateServersReady) 119 return result 120 } 121 122 // StateServersDone returns the machine ids for state servers that 123 // have completed their upgrades. 124 func (info *UpgradeInfo) StateServersDone() []string { 125 result := make([]string, len(info.doc.StateServersDone)) 126 copy(result, info.doc.StateServersDone) 127 return result 128 } 129 130 // Refresh updates the contents of the UpgradeInfo from underlying state. 131 func (info *UpgradeInfo) Refresh() error { 132 doc, err := currentUpgradeInfoDoc(info.st) 133 if err != nil { 134 return errors.Trace(err) 135 } 136 info.doc = *doc 137 return nil 138 } 139 140 // Watcher returns a watcher for the state underlying the current 141 // UpgradeInfo instance. This is provided purely for convenience. 142 func (info *UpgradeInfo) Watch() NotifyWatcher { 143 return info.st.WatchUpgradeInfo() 144 } 145 146 // AllProvisionedStateServersReady returns true if and only if all state servers 147 // that have been started by the provisioner have called EnsureUpgradeInfo with 148 // matching versions. 149 // 150 // When this returns true the master state state server can begin it's 151 // own upgrade. 152 func (info *UpgradeInfo) AllProvisionedStateServersReady() (bool, error) { 153 provisioned, err := info.getProvisionedStateServers() 154 if err != nil { 155 return false, errors.Trace(err) 156 } 157 ready := set.NewStrings(info.doc.StateServersReady...) 158 missing := set.NewStrings(provisioned...).Difference(ready) 159 return missing.IsEmpty(), nil 160 } 161 162 func (info *UpgradeInfo) getProvisionedStateServers() ([]string, error) { 163 var provisioned []string 164 165 stateServerInfo, err := info.st.StateServerInfo() 166 if err != nil { 167 return provisioned, errors.Annotate(err, "cannot read state servers") 168 } 169 170 upgradeDone, err := info.isEnvUUIDUpgradeDone() 171 if err != nil { 172 return provisioned, errors.Trace(err) 173 } 174 175 // Extract current and provisioned state servers. 176 instanceData, closer := info.st.getRawCollection(instanceDataC) 177 defer closer() 178 179 // If instanceData has the env UUID upgrade query using the 180 // machineid field, otherwise check using _id. 181 var sel bson.D 182 var field string 183 if upgradeDone { 184 sel = bson.D{{"env-uuid", info.st.EnvironUUID()}} 185 field = "machineid" 186 } else { 187 field = "_id" 188 } 189 sel = append(sel, bson.DocElem{field, bson.D{{"$in", stateServerInfo.MachineIds}}}) 190 iter := instanceData.Find(sel).Select(bson.D{{field, true}}).Iter() 191 192 var doc bson.M 193 for iter.Next(&doc) { 194 provisioned = append(provisioned, doc[field].(string)) 195 } 196 if err := iter.Close(); err != nil { 197 return provisioned, errors.Annotate(err, "cannot read provisioned machines") 198 } 199 return provisioned, nil 200 } 201 202 func (info *UpgradeInfo) isEnvUUIDUpgradeDone() (bool, error) { 203 instanceData, closer := info.st.getRawCollection(instanceDataC) 204 defer closer() 205 206 query := instanceData.Find(bson.D{{"env-uuid", bson.D{{"$exists", true}}}}) 207 n, err := query.Count() 208 if err != nil { 209 return false, errors.Annotatef(err, "couldn't query instance upgrade status") 210 } 211 return n > 0, nil 212 } 213 214 // SetStatus sets the status of the current upgrade. Checks are made 215 // to ensure that status changes are performed in the correct order. 216 func (info *UpgradeInfo) SetStatus(status UpgradeStatus) error { 217 var assertSane bson.D 218 switch status { 219 case UpgradePending, UpgradeComplete, UpgradeAborted: 220 return errors.Errorf("cannot explicitly set upgrade status to \"%s\"", status) 221 case UpgradeRunning: 222 assertSane = bson.D{{"status", bson.D{{"$in", 223 []UpgradeStatus{UpgradePending, UpgradeRunning}, 224 }}}} 225 case UpgradeFinishing: 226 assertSane = bson.D{{"status", bson.D{{"$in", 227 []UpgradeStatus{UpgradeRunning, UpgradeFinishing}, 228 }}}} 229 default: 230 return errors.Errorf("unknown upgrade status: %s", status) 231 } 232 if info.doc.Id != currentUpgradeId { 233 return errors.New("cannot set status on non-current upgrade") 234 } 235 236 ops := []txn.Op{{ 237 C: upgradeInfoC, 238 Id: currentUpgradeId, 239 Assert: append(bson.D{{ 240 "previousVersion", info.doc.PreviousVersion, 241 }, { 242 "targetVersion", info.doc.TargetVersion, 243 }}, assertSane...), 244 Update: bson.D{{"$set", bson.D{{"status", status}}}}, 245 }} 246 err := info.st.runTransaction(ops) 247 if err == txn.ErrAborted { 248 return errors.Errorf("cannot set upgrade status to %q: Another "+ 249 "status change may have occurred concurrently", status) 250 } 251 return errors.Annotate(err, "cannot set upgrade status") 252 } 253 254 // EnsureUpgradeInfo returns an UpgradeInfo describing a current upgrade between the 255 // supplied versions. If a matching upgrade is in progress, that upgrade is returned; 256 // if there's a mismatch, an error is returned. The supplied machine id must correspond 257 // to a current state server. 258 func (st *State) EnsureUpgradeInfo(machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 259 260 assertSanity, err := checkUpgradeInfoSanity(st, machineId, previousVersion, targetVersion) 261 if err != nil { 262 return nil, errors.Trace(err) 263 } 264 265 doc := upgradeInfoDoc{ 266 Id: currentUpgradeId, 267 PreviousVersion: previousVersion, 268 TargetVersion: targetVersion, 269 Status: UpgradePending, 270 Started: time.Now().UTC(), 271 StateServersReady: []string{machineId}, 272 } 273 274 machine, err := st.Machine(machineId) 275 if err != nil { 276 return nil, errors.Trace(err) 277 } 278 279 ops := []txn.Op{{ 280 C: upgradeInfoC, 281 Id: currentUpgradeId, 282 Assert: txn.DocMissing, 283 Insert: doc, 284 }, { 285 C: instanceDataC, 286 Id: machine.doc.DocID, 287 Assert: txn.DocExists, 288 }} 289 if err := st.runRawTransaction(ops); err == nil { 290 return &UpgradeInfo{st: st, doc: doc}, nil 291 } else if err != txn.ErrAborted { 292 return nil, errors.Annotate(err, "cannot create upgrade info") 293 } 294 295 if provisioned, err := st.isMachineProvisioned(machineId); err != nil { 296 return nil, errors.Trace(err) 297 } else if !provisioned { 298 return nil, errors.Errorf( 299 "machine %s is not provisioned and should not be participating in upgrades", 300 machineId) 301 } 302 303 if info, err := ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion); err == nil { 304 return info, nil 305 } else if errors.Cause(err) != errUpgradeInfoNotUpdated { 306 return nil, errors.Trace(err) 307 } 308 309 ops = []txn.Op{{ 310 C: upgradeInfoC, 311 Id: currentUpgradeId, 312 Assert: assertSanity, 313 Update: bson.D{{ 314 "$addToSet", bson.D{{"stateServersReady", machineId}}, 315 }}, 316 }} 317 switch err := st.runTransaction(ops); err { 318 case nil: 319 return ensureUpgradeInfoUpdated(st, machineId, previousVersion, targetVersion) 320 case txn.ErrAborted: 321 return nil, errors.New("upgrade info changed during update") 322 } 323 return nil, errors.Annotate(err, "cannot update upgrade info") 324 } 325 326 func (st *State) isMachineProvisioned(machineId string) (bool, error) { 327 instanceData, closer := st.getRawCollection(instanceDataC) 328 defer closer() 329 330 for _, id := range []string{st.docID(machineId), machineId} { 331 count, err := instanceData.FindId(id).Count() 332 if err != nil { 333 return false, errors.Annotate(err, "cannot read instance data") 334 } 335 if count > 0 { 336 return true, nil 337 } 338 } 339 return false, nil 340 } 341 342 var errUpgradeInfoNotUpdated = errors.New("upgrade info not updated") 343 344 func ensureUpgradeInfoUpdated(st *State, machineId string, previousVersion, targetVersion version.Number) (*UpgradeInfo, error) { 345 var doc upgradeInfoDoc 346 if pdoc, err := currentUpgradeInfoDoc(st); err != nil { 347 return nil, errors.Trace(err) 348 } else { 349 doc = *pdoc 350 } 351 352 if doc.PreviousVersion != previousVersion { 353 return nil, errors.Errorf( 354 "current upgrade info mismatch: expected previous version %s, got %s", 355 previousVersion, doc.PreviousVersion) 356 } 357 if doc.TargetVersion != targetVersion { 358 return nil, errors.Errorf( 359 "current upgrade info mismatch: expected target version %s, got %s", 360 targetVersion, doc.TargetVersion) 361 } 362 363 stateServersReady := set.NewStrings(doc.StateServersReady...) 364 if !stateServersReady.Contains(machineId) { 365 return nil, errors.Trace(errUpgradeInfoNotUpdated) 366 } 367 return &UpgradeInfo{st: st, doc: doc}, nil 368 } 369 370 // SetStateServerDone marks the supplied state machineId as having 371 // completed its upgrades. When SetStateServerDone is called by the 372 // last provisioned state server, the current upgrade info document 373 // will be archived with a status of UpgradeComplete. 374 func (info *UpgradeInfo) SetStateServerDone(machineId string) error { 375 assertSanity, err := checkUpgradeInfoSanity(info.st, machineId, 376 info.doc.PreviousVersion, info.doc.TargetVersion) 377 if err != nil { 378 return errors.Trace(err) 379 } 380 381 buildTxn := func(attempt int) ([]txn.Op, error) { 382 doc, err := currentUpgradeInfoDoc(info.st) 383 if errors.IsNotFound(err) { 384 return nil, jujutxn.ErrNoOperations 385 } else if err != nil { 386 return nil, errors.Trace(err) 387 } 388 switch doc.Status { 389 case UpgradePending, UpgradeRunning: 390 return nil, errors.New("upgrade has not yet run") 391 } 392 393 stateServersDone := set.NewStrings(doc.StateServersDone...) 394 if stateServersDone.Contains(machineId) { 395 return nil, jujutxn.ErrNoOperations 396 } 397 stateServersDone.Add(machineId) 398 399 stateServersReady := set.NewStrings(doc.StateServersReady...) 400 stateServersNotDone := stateServersReady.Difference(stateServersDone) 401 if stateServersNotDone.IsEmpty() { 402 // This is the last state server. Archive the current 403 // upgradeInfo document. 404 doc.StateServersDone = stateServersDone.SortedValues() 405 return info.makeArchiveOps(doc, UpgradeComplete), nil 406 } 407 408 return []txn.Op{{ 409 C: upgradeInfoC, 410 Id: currentUpgradeId, 411 // This is not the last state server, but we need to be 412 // sure it still isn't when we run this. 413 Assert: append(assertSanity, bson.D{{ 414 "stateServersDone", bson.D{{"$nin", stateServersNotDone.Values()}}, 415 }}...), 416 Update: bson.D{{"$addToSet", bson.D{{"stateServersDone", machineId}}}}, 417 }}, nil 418 } 419 err = info.st.run(buildTxn) 420 return errors.Annotate(err, "cannot complete upgrade") 421 } 422 423 // Abort marks the current upgrade as aborted. It should be called if 424 // the upgrade can't be completed for some reason. 425 func (info *UpgradeInfo) Abort() error { 426 buildTxn := func(attempt int) ([]txn.Op, error) { 427 doc, err := currentUpgradeInfoDoc(info.st) 428 if errors.IsNotFound(err) { 429 return nil, jujutxn.ErrNoOperations 430 } else if err != nil { 431 return nil, errors.Trace(err) 432 } 433 return info.makeArchiveOps(doc, UpgradeAborted), nil 434 } 435 err := info.st.run(buildTxn) 436 return errors.Annotate(err, "cannot abort upgrade") 437 } 438 439 func (info *UpgradeInfo) makeArchiveOps(doc *upgradeInfoDoc, status UpgradeStatus) []txn.Op { 440 doc.Status = status 441 doc.Id = bson.NewObjectId().String() // change id to archive value 442 return []txn.Op{{ 443 C: upgradeInfoC, 444 Id: currentUpgradeId, 445 Assert: assertExpectedVersions(doc.PreviousVersion, doc.TargetVersion), 446 Remove: true, 447 }, { 448 C: upgradeInfoC, 449 Id: doc.Id, 450 Assert: txn.DocMissing, 451 Insert: doc, 452 }} 453 } 454 455 // IsUpgrading returns true if an upgrade is currently in progress. 456 func (st *State) IsUpgrading() (bool, error) { 457 doc, err := currentUpgradeInfoDoc(st) 458 if doc != nil && err == nil { 459 return true, nil 460 } else if errors.IsNotFound(err) { 461 return false, nil 462 } else { 463 return false, errors.Trace(err) 464 } 465 } 466 467 // AbortCurrentUpgrade archives any current UpgradeInfo and sets its 468 // status to UpgradeAborted. Nothing happens if there's no current 469 // UpgradeInfo. 470 func (st *State) AbortCurrentUpgrade() error { 471 doc, err := currentUpgradeInfoDoc(st) 472 if err != nil { 473 if errors.IsNotFound(err) { 474 return nil 475 } 476 return errors.Trace(err) 477 } 478 info := &UpgradeInfo{st: st, doc: *doc} 479 return errors.Trace(info.Abort()) 480 481 } 482 483 func currentUpgradeInfoDoc(st *State) (*upgradeInfoDoc, error) { 484 var doc upgradeInfoDoc 485 upgradeInfo, closer := st.getCollection(upgradeInfoC) 486 defer closer() 487 if err := upgradeInfo.FindId(currentUpgradeId).One(&doc); err == mgo.ErrNotFound { 488 return nil, errors.NotFoundf("current upgrade info") 489 } else if err != nil { 490 return nil, errors.Annotate(err, "cannot read upgrade info") 491 } 492 return &doc, nil 493 } 494 495 func checkUpgradeInfoSanity(st *State, machineId string, previousVersion, targetVersion version.Number) (bson.D, error) { 496 if previousVersion.Compare(targetVersion) != -1 { 497 return nil, errors.Errorf("cannot sanely upgrade from %s to %s", previousVersion, targetVersion) 498 } 499 stateServerInfo, err := st.StateServerInfo() 500 if err != nil { 501 return nil, errors.Annotate(err, "cannot read state servers") 502 } 503 validIds := set.NewStrings(stateServerInfo.MachineIds...) 504 if !validIds.Contains(machineId) { 505 return nil, errors.Errorf("machine %q is not a state server", machineId) 506 } 507 return assertExpectedVersions(previousVersion, targetVersion), nil 508 } 509 510 func assertExpectedVersions(previousVersion, targetVersion version.Number) bson.D { 511 return bson.D{{ 512 "previousVersion", previousVersion, 513 }, { 514 "targetVersion", targetVersion, 515 }} 516 } 517 518 // ClearUpgradeInfo clears information about an upgrade in progress. It returns 519 // an error if no upgrade is current. 520 func (st *State) ClearUpgradeInfo() error { 521 ops := []txn.Op{{ 522 C: upgradeInfoC, 523 Id: currentUpgradeId, 524 Assert: txn.DocExists, 525 Remove: true, 526 }} 527 err := st.runTransaction(ops) 528 return errors.Annotate(err, "cannot clear upgrade info") 529 }