github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/migrationmaster/worker.go (about) 1 // Copyright 2016 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package migrationmaster 5 6 import ( 7 "context" 8 "fmt" 9 "io" 10 "strings" 11 "time" 12 13 "github.com/juju/clock" 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "github.com/juju/names/v5" 17 "github.com/juju/version/v2" 18 "github.com/juju/worker/v3/catacomb" 19 "github.com/kr/pretty" 20 21 "github.com/juju/juju/api" 22 "github.com/juju/juju/api/common" 23 "github.com/juju/juju/api/controller/migrationtarget" 24 corelogger "github.com/juju/juju/core/logger" 25 coremigration "github.com/juju/juju/core/migration" 26 "github.com/juju/juju/core/resources" 27 "github.com/juju/juju/core/watcher" 28 "github.com/juju/juju/migration" 29 "github.com/juju/juju/rpc/params" 30 "github.com/juju/juju/tools" 31 "github.com/juju/juju/worker/fortress" 32 "github.com/juju/juju/wrench" 33 ) 34 35 var ( 36 // ErrInactive is returned when the migration is no longer active 37 // (probably aborted). In this case the migrationmaster should be 38 // restarted so that it can wait for the next migration attempt. 39 ErrInactive = errors.New("migration is no longer active") 40 41 // ErrMigrated is returned when the model has migrated to another 42 // server. The migrationmaster should not be restarted again in 43 // this case. 44 ErrMigrated = errors.New("model has migrated") 45 46 // utcZero matches the deserialised zero times coming back from 47 // MigrationTarget.LatestLogTime, because they have a non-nil 48 // location. 49 utcZero = time.Time{}.In(time.UTC) 50 ) 51 52 // progressUpdateInterval is the time between progress update 53 // messages. It's used while the migrationmaster is waiting for 54 // reports from minions and while it's transferring log messages 55 // to the newly-migrated model. 56 const progressUpdateInterval = 30 * time.Second 57 58 // Facade exposes controller functionality to a Worker. 59 type Facade interface { 60 // Watch returns a watcher which reports when a migration is 61 // active for the model associated with the API connection. 62 Watch() (watcher.NotifyWatcher, error) 63 64 // MigrationStatus returns the details and progress of the latest 65 // model migration. 66 MigrationStatus() (coremigration.MigrationStatus, error) 67 68 // SetPhase updates the phase of the currently active model 69 // migration. 70 SetPhase(coremigration.Phase) error 71 72 // SetStatusMessage sets a human readable message regarding the 73 // progress of a migration. 74 SetStatusMessage(string) error 75 76 // Prechecks performs pre-migration checks on the model and 77 // (source) controller. 78 Prechecks() error 79 80 // ModelInfo return basic information about the model to migrated. 81 ModelInfo() (coremigration.ModelInfo, error) 82 83 // SourceControllerInfo returns connection information about the source controller 84 // and uuids of any other hosted models involved in cross model relations. 85 SourceControllerInfo() (coremigration.SourceControllerInfo, []string, error) 86 87 // Export returns a serialized representation of the model 88 // associated with the API connection. 89 Export() (coremigration.SerializedModel, error) 90 91 // ProcessRelations runs a series of processes to ensure that the relations 92 // of a given model are correct after a migrated model. 93 ProcessRelations(string) error 94 95 // OpenResource downloads a single resource for an application. 96 OpenResource(string, string) (io.ReadCloser, error) 97 98 // Reap removes all documents of the model associated with the API 99 // connection. 100 Reap() error 101 102 // WatchMinionReports returns a watcher which reports when a migration 103 // minion has made a report for the current migration phase. 104 WatchMinionReports() (watcher.NotifyWatcher, error) 105 106 // MinionReports returns details of the reports made by migration 107 // minions to the controller for the current migration phase. 108 MinionReports() (coremigration.MinionReports, error) 109 110 // MinionReportTimeout returns the maximum time to wait for minion workers 111 // to report on a migration phase. 112 MinionReportTimeout() (time.Duration, error) 113 114 // StreamModelLog takes a starting time and returns a channel that 115 // will yield the logs on or after that time - these are the logs 116 // that need to be transferred to the target after the migration 117 // is successful. 118 StreamModelLog(context.Context, time.Time) (<-chan common.LogMessage, error) 119 } 120 121 // Config defines the operation of a Worker. 122 type Config struct { 123 ModelUUID string 124 Facade Facade 125 Guard fortress.Guard 126 APIOpen func(*api.Info, api.DialOpts) (api.Connection, error) 127 UploadBinaries func(migration.UploadBinariesConfig) error 128 CharmDownloader migration.CharmDownloader 129 ToolsDownloader migration.ToolsDownloader 130 Clock clock.Clock 131 } 132 133 // Validate returns an error if config cannot drive a Worker. 134 func (config Config) Validate() error { 135 if !names.IsValidModel(config.ModelUUID) { 136 return errors.NotValidf("model UUID %q", config.ModelUUID) 137 } 138 if config.Facade == nil { 139 return errors.NotValidf("nil Facade") 140 } 141 if config.Guard == nil { 142 return errors.NotValidf("nil Guard") 143 } 144 if config.APIOpen == nil { 145 return errors.NotValidf("nil APIOpen") 146 } 147 if config.UploadBinaries == nil { 148 return errors.NotValidf("nil UploadBinaries") 149 } 150 if config.CharmDownloader == nil { 151 return errors.NotValidf("nil CharmDownloader") 152 } 153 if config.ToolsDownloader == nil { 154 return errors.NotValidf("nil ToolsDownloader") 155 } 156 if config.Clock == nil { 157 return errors.NotValidf("nil Clock") 158 } 159 return nil 160 } 161 162 // New returns a Worker backed by config, or an error. 163 func New(config Config) (*Worker, error) { 164 if err := config.Validate(); err != nil { 165 return nil, errors.Trace(err) 166 } 167 168 // Soon we will get model specific logs generated in the 169 // controller logged against the model. Until then, distinguish 170 // the logs from different migrationmaster insteads using the short 171 // model UUID suffix. 172 loggerName := "juju.worker.migrationmaster." + names.NewModelTag(config.ModelUUID).ShortId() 173 logger := loggo.GetLoggerWithLabels(loggerName, corelogger.MIGRATION) 174 175 w := &Worker{ 176 config: config, 177 logger: logger, 178 } 179 err := catacomb.Invoke(catacomb.Plan{ 180 Site: &w.catacomb, 181 Work: w.run, 182 }) 183 if err != nil { 184 return nil, errors.Trace(err) 185 } 186 return w, nil 187 } 188 189 // Worker waits until a migration is active and its configured 190 // Fortress is locked down, and then orchestrates a model migration. 191 type Worker struct { 192 catacomb catacomb.Catacomb 193 config Config 194 logger loggo.Logger 195 lastFailure string 196 minionReportTimeout time.Duration 197 } 198 199 // Kill implements worker.Worker. 200 func (w *Worker) Kill() { 201 w.catacomb.Kill(nil) 202 } 203 204 // Wait implements worker.Worker. 205 func (w *Worker) Wait() error { 206 return w.catacomb.Wait() 207 } 208 209 func (w *Worker) run() error { 210 status, err := w.waitForActiveMigration() 211 if err != nil { 212 return errors.Trace(err) 213 } 214 215 err = w.config.Guard.Lockdown(w.catacomb.Dying()) 216 if errors.Cause(err) == fortress.ErrAborted { 217 return w.catacomb.ErrDying() 218 } else if err != nil { 219 return errors.Trace(err) 220 } 221 222 if w.minionReportTimeout, err = w.config.Facade.MinionReportTimeout(); err != nil { 223 return errors.Trace(err) 224 } 225 226 phase := status.Phase 227 228 for { 229 var err error 230 switch phase { 231 case coremigration.QUIESCE: 232 phase, err = w.doQUIESCE(status) 233 case coremigration.IMPORT: 234 phase, err = w.doIMPORT(status.TargetInfo, status.ModelUUID) 235 case coremigration.PROCESSRELATIONS: 236 phase, err = w.doPROCESSRELATIONS(status) 237 case coremigration.VALIDATION: 238 phase, err = w.doVALIDATION(status) 239 case coremigration.SUCCESS: 240 phase, err = w.doSUCCESS(status) 241 case coremigration.LOGTRANSFER: 242 phase, err = w.doLOGTRANSFER(status.TargetInfo, status.ModelUUID) 243 case coremigration.REAP: 244 phase, err = w.doREAP() 245 case coremigration.ABORT: 246 phase, err = w.doABORT(status.TargetInfo, status.ModelUUID) 247 default: 248 return errors.Errorf("unknown phase: %v [%d]", phase.String(), phase) 249 } 250 251 if err != nil { 252 // A phase handler should only return an error if the 253 // migration master should exit. In the face of other 254 // errors the handler should log the problem and then 255 // return the appropriate error phase to transition to - 256 // i.e. ABORT or REAPFAILED) 257 return errors.Trace(err) 258 } 259 260 if w.killed() { 261 return w.catacomb.ErrDying() 262 } 263 264 w.logger.Infof("setting migration phase to %s", phase) 265 if err := w.config.Facade.SetPhase(phase); err != nil { 266 return errors.Annotate(err, "failed to set phase") 267 } 268 status.Phase = phase 269 270 if modelHasMigrated(phase) { 271 return ErrMigrated 272 } else if phase.IsTerminal() { 273 // Some other terminal phase (aborted), exit and try 274 // again. 275 return ErrInactive 276 } 277 } 278 } 279 280 func (w *Worker) killed() bool { 281 select { 282 case <-w.catacomb.Dying(): 283 return true 284 default: 285 return false 286 } 287 } 288 289 func (w *Worker) setInfoStatus(s string, a ...interface{}) { 290 w.setStatusAndLog(w.logger.Infof, s, a...) 291 } 292 293 func (w *Worker) setErrorStatus(s string, a ...interface{}) { 294 w.lastFailure = fmt.Sprintf(s, a...) 295 w.setStatusAndLog(w.logger.Errorf, s, a...) 296 } 297 298 func (w *Worker) setStatusAndLog(log func(string, ...interface{}), s string, a ...interface{}) { 299 message := fmt.Sprintf(s, a...) 300 log(message) 301 if err := w.setStatus(message); err != nil { 302 // Setting status isn't critical. If it fails, just logging 303 // the problem here and not passing it upstream makes things a 304 // lot clearer in the caller. 305 w.logger.Errorf("%s", err) 306 } 307 } 308 309 func (w *Worker) setStatus(message string) error { 310 err := w.config.Facade.SetStatusMessage(message) 311 return errors.Annotate(err, "failed to set status message") 312 } 313 314 func (w *Worker) doQUIESCE(status coremigration.MigrationStatus) (coremigration.Phase, error) { 315 // Run prechecks before waiting for minions to report back. This 316 // short-circuits the long timeout in the case of an agent being 317 // down. 318 if err := w.prechecks(status); err != nil { 319 w.setErrorStatus(err.Error()) 320 return coremigration.ABORT, nil 321 } 322 323 ok, err := w.waitForMinions(status, failFast, "quiescing") 324 if err != nil { 325 return coremigration.UNKNOWN, errors.Trace(err) 326 } 327 if !ok { 328 return coremigration.ABORT, nil 329 } 330 331 // Now that the model is stable, run the prechecks again. 332 if err := w.prechecks(status); err != nil { 333 w.setErrorStatus(err.Error()) 334 return coremigration.ABORT, nil 335 } 336 337 return coremigration.IMPORT, nil 338 } 339 340 var incompatibleTargetMessage = ` 341 target controller must be upgraded to 2.9.43 or later 342 to be able to migrate models with cross model relations 343 to other models hosted on the source controller 344 `[1:] 345 346 func (w *Worker) prechecks(status coremigration.MigrationStatus) error { 347 model, err := w.config.Facade.ModelInfo() 348 if err != nil { 349 return errors.Annotate(err, "failed to obtain model info during prechecks") 350 } 351 352 w.setInfoStatus("performing source prechecks") 353 if err := w.config.Facade.Prechecks(); err != nil { 354 return errors.Annotate(err, "source prechecks failed") 355 } 356 357 w.setInfoStatus("performing target prechecks") 358 targetConn, err := w.openAPIConn(status.TargetInfo) 359 if err != nil { 360 return errors.Annotate(err, "failed to connect to target controller during prechecks") 361 } 362 defer targetConn.Close() 363 if targetConn.ControllerTag() != status.TargetInfo.ControllerTag { 364 return errors.Errorf("unexpected target controller UUID (got %s, expected %s)", 365 targetConn.ControllerTag(), status.TargetInfo.ControllerTag) 366 } 367 targetClient := migrationtarget.NewClient(targetConn) 368 // If we have cross model relations to other models on this controller, 369 // we need to ensure the target controller is recent enough to process those. 370 if targetClient.BestFacadeVersion() < 2 { 371 _, localRelatedModels, err := w.config.Facade.SourceControllerInfo() 372 if err != nil { 373 return errors.Annotate(err, "cannot get local model info") 374 } 375 if len(localRelatedModels) > 0 { 376 return errors.Errorf(incompatibleTargetMessage) 377 } 378 } 379 err = targetClient.Prechecks(model) 380 return errors.Annotate(err, "target prechecks failed") 381 } 382 383 func (w *Worker) doIMPORT(targetInfo coremigration.TargetInfo, modelUUID string) (coremigration.Phase, error) { 384 err := w.transferModel(targetInfo, modelUUID) 385 if err != nil { 386 w.setErrorStatus("model data transfer failed, %v", err) 387 return coremigration.ABORT, nil 388 } 389 return coremigration.PROCESSRELATIONS, nil 390 } 391 392 type uploadWrapper struct { 393 client *migrationtarget.Client 394 modelUUID string 395 } 396 397 // UploadTools prepends the model UUID to the args passed to the migration client. 398 func (w *uploadWrapper) UploadTools(r io.ReadSeeker, vers version.Binary) (tools.List, error) { 399 return w.client.UploadTools(w.modelUUID, r, vers) 400 } 401 402 // UploadCharm prepends the model UUID to the args passed to the migration client. 403 func (w *uploadWrapper) UploadCharm(curl string, charmRef string, content io.ReadSeeker) (string, error) { 404 return w.client.UploadCharm(w.modelUUID, curl, charmRef, content) 405 } 406 407 // UploadResource prepends the model UUID to the args passed to the migration client. 408 func (w *uploadWrapper) UploadResource(res resources.Resource, content io.ReadSeeker) error { 409 return w.client.UploadResource(w.modelUUID, res, content) 410 } 411 412 // SetPlaceholderResource prepends the model UUID to the args passed to the migration client. 413 func (w *uploadWrapper) SetPlaceholderResource(res resources.Resource) error { 414 return w.client.SetPlaceholderResource(w.modelUUID, res) 415 } 416 417 // SetUnitResource prepends the model UUID to the args passed to the migration client. 418 func (w *uploadWrapper) SetUnitResource(unitName string, res resources.Resource) error { 419 return w.client.SetUnitResource(w.modelUUID, unitName, res) 420 } 421 422 func (w *Worker) transferModel(targetInfo coremigration.TargetInfo, modelUUID string) error { 423 w.setInfoStatus("exporting model") 424 serialized, err := w.config.Facade.Export() 425 if err != nil { 426 return errors.Annotate(err, "model export failed") 427 } 428 429 w.setInfoStatus("importing model into target controller") 430 conn, err := w.openAPIConn(targetInfo) 431 if err != nil { 432 return errors.Annotate(err, "failed to connect to target controller") 433 } 434 defer conn.Close() 435 targetClient := migrationtarget.NewClient(conn) 436 err = targetClient.Import(serialized.Bytes) 437 if err != nil { 438 return errors.Annotate(err, "failed to import model into target controller") 439 } 440 441 if wrench.IsActive("migrationmaster", "die-in-export") { 442 // Simulate a abort causing failure to test last status not over written. 443 return errors.New("wrench in the transferModel works") 444 } 445 446 w.setInfoStatus("uploading model binaries into target controller") 447 wrapper := &uploadWrapper{targetClient, modelUUID} 448 err = w.config.UploadBinaries(migration.UploadBinariesConfig{ 449 Charms: serialized.Charms, 450 CharmDownloader: w.config.CharmDownloader, 451 CharmUploader: wrapper, 452 453 Tools: serialized.Tools, 454 ToolsDownloader: w.config.ToolsDownloader, 455 ToolsUploader: wrapper, 456 457 Resources: serialized.Resources, 458 ResourceDownloader: w.config.Facade, 459 ResourceUploader: wrapper, 460 }) 461 return errors.Annotate(err, "failed to migrate binaries") 462 } 463 464 func (w *Worker) doPROCESSRELATIONS(status coremigration.MigrationStatus) (coremigration.Phase, error) { 465 err := w.processRelations(status.TargetInfo, status.ModelUUID) 466 if err != nil { 467 w.setErrorStatus("processing relations failed, %v", err) 468 return coremigration.ABORT, nil 469 } 470 return coremigration.VALIDATION, nil 471 } 472 473 func (w *Worker) processRelations(targetInfo coremigration.TargetInfo, modelUUID string) error { 474 w.setInfoStatus("processing relations") 475 err := w.config.Facade.ProcessRelations(targetInfo.ControllerAlias) 476 if err != nil { 477 return errors.Annotate(err, "processing relations failed") 478 } 479 return nil 480 } 481 482 func (w *Worker) doVALIDATION(status coremigration.MigrationStatus) (coremigration.Phase, error) { 483 // Wait for agents to complete their validation checks. 484 ok, err := w.waitForMinions(status, failFast, "validating") 485 if err != nil { 486 return coremigration.UNKNOWN, errors.Trace(err) 487 } 488 if !ok { 489 return coremigration.ABORT, nil 490 } 491 492 client, closer, err := w.openTargetAPI(status.TargetInfo) 493 if err != nil { 494 return coremigration.UNKNOWN, errors.Trace(err) 495 } 496 defer func() { _ = closer() }() 497 498 // Check that the provider and target controller agree about what 499 // machines belong to the migrated model. 500 ok, err = w.checkTargetMachines(client, status.ModelUUID) 501 if err != nil { 502 return coremigration.UNKNOWN, errors.Trace(err) 503 } 504 if !ok { 505 return coremigration.ABORT, nil 506 } 507 508 // Once all agents have validated, activate the model in the 509 // target controller. 510 err = w.activateModel(client, status.ModelUUID) 511 if err != nil { 512 w.setErrorStatus("model activation failed, %v", err) 513 return coremigration.ABORT, nil 514 } 515 return coremigration.SUCCESS, nil 516 } 517 518 func (w *Worker) checkTargetMachines(targetClient *migrationtarget.Client, modelUUID string) (bool, error) { 519 w.setInfoStatus("checking machines in migrated model") 520 results, err := targetClient.CheckMachines(modelUUID) 521 if err != nil { 522 return false, errors.Trace(err) 523 } 524 if len(results) > 0 { 525 for _, resultErr := range results { 526 w.logger.Errorf(resultErr.Error()) 527 } 528 plural := "s" 529 if len(results) == 1 { 530 plural = "" 531 } 532 w.setErrorStatus("machine sanity check failed, %d error%s found", len(results), plural) 533 return false, nil 534 } 535 return true, nil 536 } 537 538 func (w *Worker) activateModel(targetClient *migrationtarget.Client, modelUUID string) error { 539 w.setInfoStatus("activating model in target controller") 540 info, localRelatedModels, err := w.config.Facade.SourceControllerInfo() 541 if err != nil { 542 return errors.Trace(err) 543 } 544 return errors.Trace(targetClient.Activate(modelUUID, info, localRelatedModels)) 545 } 546 547 func (w *Worker) doSUCCESS(status coremigration.MigrationStatus) (coremigration.Phase, error) { 548 _, err := w.waitForMinions(status, waitForAll, "successful") 549 if err != nil { 550 return coremigration.UNKNOWN, errors.Trace(err) 551 } 552 err = w.transferResources(status.TargetInfo, status.ModelUUID) 553 if err != nil { 554 return coremigration.UNKNOWN, errors.Trace(err) 555 } 556 // There's no turning back from SUCCESS - any problems should have 557 // been picked up in VALIDATION. After the minion wait in the 558 // SUCCESS phase, the migration can only proceed to LOGTRANSFER. 559 return coremigration.LOGTRANSFER, nil 560 } 561 562 func (w *Worker) transferResources(targetInfo coremigration.TargetInfo, modelUUID string) error { 563 w.setInfoStatus("transferring ownership of cloud resources to target controller") 564 conn, err := w.openAPIConn(targetInfo) 565 if err != nil { 566 return errors.Trace(err) 567 } 568 defer conn.Close() 569 570 targetClient := migrationtarget.NewClient(conn) 571 err = targetClient.AdoptResources(modelUUID) 572 return errors.Trace(err) 573 } 574 575 func (w *Worker) doLOGTRANSFER(targetInfo coremigration.TargetInfo, modelUUID string) (coremigration.Phase, error) { 576 err := w.transferLogs(targetInfo, modelUUID) 577 if err != nil { 578 return coremigration.UNKNOWN, errors.Trace(err) 579 } 580 return coremigration.REAP, nil 581 } 582 583 func (w *Worker) transferLogs(targetInfo coremigration.TargetInfo, modelUUID string) error { 584 sent := 0 585 reportProgress := func(finished bool, sent int) { 586 verb := "transferring" 587 if finished { 588 verb = "transferred" 589 } 590 w.setInfoStatus("successful, %s logs to target controller (%d sent)", verb, sent) 591 } 592 reportProgress(false, sent) 593 594 conn, err := w.openAPIConn(targetInfo) 595 if err != nil { 596 return errors.Annotate(err, "connecting to target API") 597 } 598 targetClient := migrationtarget.NewClient(conn) 599 latestLogTime, err := targetClient.LatestLogTime(modelUUID) 600 if err != nil { 601 return errors.Annotate(err, "getting log start time") 602 } 603 604 if latestLogTime != utcZero { 605 w.logger.Debugf("log transfer was interrupted - restarting from %s", latestLogTime) 606 } 607 608 throwWrench := latestLogTime == utcZero && wrench.IsActive("migrationmaster", "die-after-500-log-messages") 609 610 ctx, cancel := context.WithCancel(context.TODO()) 611 defer cancel() 612 613 logSource, err := w.config.Facade.StreamModelLog(ctx, latestLogTime) 614 if err != nil { 615 return errors.Annotate(err, "opening source log stream") 616 } 617 618 logTarget, err := targetClient.OpenLogTransferStream(modelUUID) 619 if err != nil { 620 return errors.Annotate(err, "opening target log stream") 621 } 622 defer logTarget.Close() 623 624 clk := w.config.Clock 625 logProgress := clk.After(progressUpdateInterval) 626 627 for { 628 select { 629 case <-w.catacomb.Dying(): 630 return w.catacomb.ErrDying() 631 case msg, ok := <-logSource: 632 if !ok { 633 // The channel's been closed, we're finished! 634 reportProgress(true, sent) 635 return nil 636 } 637 err := logTarget.WriteJSON(params.LogRecord{ 638 Entity: msg.Entity, 639 Time: msg.Timestamp, 640 Module: msg.Module, 641 Location: msg.Location, 642 Level: msg.Severity, 643 Message: msg.Message, 644 Labels: msg.Labels, 645 }) 646 if err != nil { 647 return errors.Trace(err) 648 } 649 sent++ 650 651 if throwWrench && sent == 500 { 652 // Simulate a connection drop to test restartability. 653 return errors.New("wrench in the works") 654 } 655 case <-logProgress: 656 reportProgress(false, sent) 657 logProgress = clk.After(progressUpdateInterval) 658 } 659 } 660 } 661 662 func (w *Worker) doREAP() (coremigration.Phase, error) { 663 w.setInfoStatus("successful, removing model from source controller") 664 // NOTE(babbageclunk): Calling Reap will set the migration phase 665 // to DONE if successful - this avoids a race where this worker is 666 // killed by the model going away before it can update the phase 667 // itself. 668 err := w.config.Facade.Reap() 669 if err != nil { 670 w.setErrorStatus("removing exported model failed: %s", err.Error()) 671 return coremigration.REAPFAILED, nil 672 } 673 return coremigration.DONE, nil 674 } 675 676 func (w *Worker) doABORT(targetInfo coremigration.TargetInfo, modelUUID string) (coremigration.Phase, error) { 677 w.setInfoStatus("aborted, removing model from target controller: %s", w.lastFailure) 678 if err := w.removeImportedModel(targetInfo, modelUUID); err != nil { 679 // This isn't fatal. Removing the imported model is a best 680 // efforts attempt so just report the error and proceed. 681 w.logger.Warningf("failed to remove model from target controller, %v", err) 682 } 683 return coremigration.ABORTDONE, nil 684 } 685 686 func (w *Worker) removeImportedModel(targetInfo coremigration.TargetInfo, modelUUID string) error { 687 conn, err := w.openAPIConn(targetInfo) 688 if err != nil { 689 return errors.Trace(err) 690 } 691 defer conn.Close() 692 693 targetClient := migrationtarget.NewClient(conn) 694 err = targetClient.Abort(modelUUID) 695 return errors.Trace(err) 696 } 697 698 func (w *Worker) waitForActiveMigration() (coremigration.MigrationStatus, error) { 699 var empty coremigration.MigrationStatus 700 701 watcher, err := w.config.Facade.Watch() 702 if err != nil { 703 return empty, errors.Annotate(err, "watching for migration") 704 } 705 if err := w.catacomb.Add(watcher); err != nil { 706 return empty, errors.Trace(err) 707 } 708 defer watcher.Kill() 709 710 for { 711 select { 712 case <-w.catacomb.Dying(): 713 return empty, w.catacomb.ErrDying() 714 case <-watcher.Changes(): 715 } 716 717 status, err := w.config.Facade.MigrationStatus() 718 switch { 719 case params.IsCodeNotFound(err): 720 // There's never been a migration. 721 case err == nil && status.Phase.IsTerminal(): 722 // No migration in progress. 723 if modelHasMigrated(status.Phase) { 724 return empty, ErrMigrated 725 } 726 case err != nil: 727 return empty, errors.Annotate(err, "retrieving migration status") 728 default: 729 // Migration is in progress. 730 return status, nil 731 } 732 733 // While waiting for a migration, ensure the fortress is open. 734 if err := w.config.Guard.Unlock(); err != nil { 735 return empty, errors.Trace(err) 736 } 737 } 738 } 739 740 // Possible values for waitForMinion's waitPolicy argument. 741 const failFast = false // Stop waiting at first minion failure report 742 const waitForAll = true // Wait for all minion reports to arrive (or timeout) 743 744 func (w *Worker) waitForMinions( 745 status coremigration.MigrationStatus, 746 waitPolicy bool, 747 infoPrefix string, 748 ) (success bool, err error) { 749 clk := w.config.Clock 750 timeout := clk.After(w.minionReportTimeout) 751 752 w.setInfoStatus("%s, waiting for agents to report back", infoPrefix) 753 w.logger.Infof("waiting for agents to report back for migration phase %s (will wait up to %s)", 754 status.Phase, truncDuration(w.minionReportTimeout)) 755 756 watch, err := w.config.Facade.WatchMinionReports() 757 if err != nil { 758 return false, errors.Trace(err) 759 } 760 if err := w.catacomb.Add(watch); err != nil { 761 return false, errors.Trace(err) 762 } 763 764 logProgress := clk.After(progressUpdateInterval) 765 766 var reports coremigration.MinionReports 767 for { 768 select { 769 case <-w.catacomb.Dying(): 770 return false, w.catacomb.ErrDying() 771 772 case <-timeout: 773 w.logger.Errorf(formatMinionTimeout(reports, status, infoPrefix)) 774 w.setErrorStatus("%s, timed out waiting for agents to report", infoPrefix) 775 return false, nil 776 777 case <-watch.Changes(): 778 var err error 779 reports, err = w.config.Facade.MinionReports() 780 if err != nil { 781 return false, errors.Trace(err) 782 } 783 if err := validateMinionReports(reports, status); err != nil { 784 return false, errors.Trace(err) 785 } 786 w.logger.Debugf("migration minion reports:\n%s", pretty.Sprint(reports)) 787 failures := len(reports.FailedMachines) + len(reports.FailedUnits) + len(reports.FailedApplications) 788 if failures > 0 { 789 w.logger.Errorf(formatMinionFailure(reports, infoPrefix)) 790 w.setErrorStatus("%s, some agents reported failure", infoPrefix) 791 if waitPolicy == failFast { 792 return false, nil 793 } 794 } 795 if reports.UnknownCount == 0 { 796 msg := formatMinionWaitDone(reports, infoPrefix) 797 if failures > 0 { 798 w.logger.Errorf(msg) 799 w.setErrorStatus("%s, some agents reported failure", infoPrefix) 800 return false, nil 801 } 802 w.logger.Infof(msg) 803 w.setInfoStatus("%s, all agents reported success", infoPrefix) 804 return true, nil 805 } 806 807 case <-logProgress: 808 w.setInfoStatus("%s, %s", infoPrefix, formatMinionWaitUpdate(reports)) 809 logProgress = clk.After(progressUpdateInterval) 810 } 811 } 812 } 813 814 func truncDuration(d time.Duration) time.Duration { 815 return (d / time.Second) * time.Second 816 } 817 818 func validateMinionReports(reports coremigration.MinionReports, status coremigration.MigrationStatus) error { 819 if reports.MigrationId != status.MigrationId { 820 return errors.Errorf("unexpected migration id in minion reports, got %v, expected %v", 821 reports.MigrationId, status.MigrationId) 822 } 823 if reports.Phase != status.Phase { 824 return errors.Errorf("minion reports phase (%s) does not match migration phase (%s)", 825 reports.Phase, status.Phase) 826 } 827 return nil 828 } 829 830 func formatMinionTimeout( 831 reports coremigration.MinionReports, 832 status coremigration.MigrationStatus, 833 infoPrefix string, 834 ) string { 835 if reports.IsZero() { 836 return fmt.Sprintf("no agents reported in time") 837 } 838 839 var fails []string 840 if len(reports.SomeUnknownMachines) > 0 { 841 fails = append(fails, fmt.Sprintf("machines: %s", strings.Join(reports.SomeUnknownMachines, ","))) 842 } 843 if len(reports.SomeUnknownUnits) > 0 { 844 fails = append(fails, fmt.Sprintf("units: %s", strings.Join(reports.SomeUnknownUnits, ","))) 845 } 846 if len(reports.SomeUnknownApplications) > 0 { 847 fails = append(fails, fmt.Sprintf("applications: %s", strings.Join(reports.SomeUnknownApplications, ","))) 848 } 849 return fmt.Sprintf("%d agents failed to report in time for %q phase (including %s)", 850 reports.UnknownCount, infoPrefix, strings.Join(fails, "; ")) 851 } 852 853 func formatMinionFailure(reports coremigration.MinionReports, infoPrefix string) string { 854 var fails []string 855 if len(reports.FailedMachines) > 0 { 856 fails = append(fails, fmt.Sprintf("machines: %s", strings.Join(reports.FailedMachines, ","))) 857 } 858 if len(reports.FailedUnits) > 0 { 859 fails = append(fails, fmt.Sprintf("units: %s", strings.Join(reports.FailedUnits, ","))) 860 } 861 if len(reports.FailedApplications) > 0 { 862 fails = append(fails, fmt.Sprintf("applications: %s", strings.Join(reports.FailedApplications, ","))) 863 } 864 return fmt.Sprintf("agents failed phase %q (%s)", infoPrefix, strings.Join(fails, "; ")) 865 } 866 867 func formatMinionWaitDone(reports coremigration.MinionReports, infoPrefix string) string { 868 return fmt.Sprintf("completed waiting for agents to report for %q, %d succeeded, %d failed", 869 infoPrefix, reports.SuccessCount, len(reports.FailedMachines)+len(reports.FailedUnits)+len(reports.FailedApplications)) 870 } 871 872 func formatMinionWaitUpdate(reports coremigration.MinionReports) string { 873 if reports.IsZero() { 874 return fmt.Sprintf("no reports from agents yet") 875 } 876 877 msg := fmt.Sprintf("waiting for agents to report back: %d succeeded, %d still to report", 878 reports.SuccessCount, reports.UnknownCount) 879 failed := len(reports.FailedMachines) + len(reports.FailedUnits) + len(reports.FailedApplications) 880 if failed > 0 { 881 msg += fmt.Sprintf(", %d failed", failed) 882 } 883 return msg 884 } 885 886 func (w *Worker) openTargetAPI(targetInfo coremigration.TargetInfo) (*migrationtarget.Client, func() error, error) { 887 conn, err := w.openAPIConn(targetInfo) 888 if err != nil { 889 return nil, nil, errors.Trace(err) 890 } 891 return migrationtarget.NewClient(conn), conn.Close, nil 892 } 893 894 func (w *Worker) openAPIConn(targetInfo coremigration.TargetInfo) (api.Connection, error) { 895 return w.openAPIConnForModel(targetInfo, "") 896 } 897 898 func (w *Worker) openAPIConnForModel(targetInfo coremigration.TargetInfo, modelUUID string) (api.Connection, error) { 899 apiInfo := &api.Info{ 900 Addrs: targetInfo.Addrs, 901 CACert: targetInfo.CACert, 902 Password: targetInfo.Password, 903 ModelTag: names.NewModelTag(modelUUID), 904 Macaroons: targetInfo.Macaroons, 905 } 906 // Only local users must be added to the api info. 907 // For external users, the tag needs to be left empty. 908 if targetInfo.AuthTag.IsLocal() { 909 apiInfo.Tag = targetInfo.AuthTag 910 } 911 return w.config.APIOpen(apiInfo, migration.ControllerDialOpts()) 912 } 913 914 func modelHasMigrated(phase coremigration.Phase) bool { 915 return phase == coremigration.DONE || phase == coremigration.REAPFAILED 916 }