github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/migrationmaster/worker.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package migrationmaster
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/juju/clock"
    14  	"github.com/juju/errors"
    15  	"github.com/juju/loggo"
    16  	"github.com/juju/names/v5"
    17  	"github.com/juju/version/v2"
    18  	"github.com/juju/worker/v3/catacomb"
    19  	"github.com/kr/pretty"
    20  
    21  	"github.com/juju/juju/api"
    22  	"github.com/juju/juju/api/common"
    23  	"github.com/juju/juju/api/controller/migrationtarget"
    24  	corelogger "github.com/juju/juju/core/logger"
    25  	coremigration "github.com/juju/juju/core/migration"
    26  	"github.com/juju/juju/core/resources"
    27  	"github.com/juju/juju/core/watcher"
    28  	"github.com/juju/juju/migration"
    29  	"github.com/juju/juju/rpc/params"
    30  	"github.com/juju/juju/tools"
    31  	"github.com/juju/juju/worker/fortress"
    32  	"github.com/juju/juju/wrench"
    33  )
    34  
    35  var (
    36  	// ErrInactive is returned when the migration is no longer active
    37  	// (probably aborted). In this case the migrationmaster should be
    38  	// restarted so that it can wait for the next migration attempt.
    39  	ErrInactive = errors.New("migration is no longer active")
    40  
    41  	// ErrMigrated is returned when the model has migrated to another
    42  	// server. The migrationmaster should not be restarted again in
    43  	// this case.
    44  	ErrMigrated = errors.New("model has migrated")
    45  
    46  	// utcZero matches the deserialised zero times coming back from
    47  	// MigrationTarget.LatestLogTime, because they have a non-nil
    48  	// location.
    49  	utcZero = time.Time{}.In(time.UTC)
    50  )
    51  
    52  // progressUpdateInterval is the time between progress update
    53  // messages. It's used while the migrationmaster is waiting for
    54  // reports from minions and while it's transferring log messages
    55  // to the newly-migrated model.
    56  const progressUpdateInterval = 30 * time.Second
    57  
    58  // Facade exposes controller functionality to a Worker.
    59  type Facade interface {
    60  	// Watch returns a watcher which reports when a migration is
    61  	// active for the model associated with the API connection.
    62  	Watch() (watcher.NotifyWatcher, error)
    63  
    64  	// MigrationStatus returns the details and progress of the latest
    65  	// model migration.
    66  	MigrationStatus() (coremigration.MigrationStatus, error)
    67  
    68  	// SetPhase updates the phase of the currently active model
    69  	// migration.
    70  	SetPhase(coremigration.Phase) error
    71  
    72  	// SetStatusMessage sets a human readable message regarding the
    73  	// progress of a migration.
    74  	SetStatusMessage(string) error
    75  
    76  	// Prechecks performs pre-migration checks on the model and
    77  	// (source) controller.
    78  	Prechecks() error
    79  
    80  	// ModelInfo return basic information about the model to migrated.
    81  	ModelInfo() (coremigration.ModelInfo, error)
    82  
    83  	// SourceControllerInfo returns connection information about the source controller
    84  	// and uuids of any other hosted models involved in cross model relations.
    85  	SourceControllerInfo() (coremigration.SourceControllerInfo, []string, error)
    86  
    87  	// Export returns a serialized representation of the model
    88  	// associated with the API connection.
    89  	Export() (coremigration.SerializedModel, error)
    90  
    91  	// ProcessRelations runs a series of processes to ensure that the relations
    92  	// of a given model are correct after a migrated model.
    93  	ProcessRelations(string) error
    94  
    95  	// OpenResource downloads a single resource for an application.
    96  	OpenResource(string, string) (io.ReadCloser, error)
    97  
    98  	// Reap removes all documents of the model associated with the API
    99  	// connection.
   100  	Reap() error
   101  
   102  	// WatchMinionReports returns a watcher which reports when a migration
   103  	// minion has made a report for the current migration phase.
   104  	WatchMinionReports() (watcher.NotifyWatcher, error)
   105  
   106  	// MinionReports returns details of the reports made by migration
   107  	// minions to the controller for the current migration phase.
   108  	MinionReports() (coremigration.MinionReports, error)
   109  
   110  	// MinionReportTimeout returns the maximum time to wait for minion workers
   111  	// to report on a migration phase.
   112  	MinionReportTimeout() (time.Duration, error)
   113  
   114  	// StreamModelLog takes a starting time and returns a channel that
   115  	// will yield the logs on or after that time - these are the logs
   116  	// that need to be transferred to the target after the migration
   117  	// is successful.
   118  	StreamModelLog(context.Context, time.Time) (<-chan common.LogMessage, error)
   119  }
   120  
   121  // Config defines the operation of a Worker.
   122  type Config struct {
   123  	ModelUUID       string
   124  	Facade          Facade
   125  	Guard           fortress.Guard
   126  	APIOpen         func(*api.Info, api.DialOpts) (api.Connection, error)
   127  	UploadBinaries  func(migration.UploadBinariesConfig) error
   128  	CharmDownloader migration.CharmDownloader
   129  	ToolsDownloader migration.ToolsDownloader
   130  	Clock           clock.Clock
   131  }
   132  
   133  // Validate returns an error if config cannot drive a Worker.
   134  func (config Config) Validate() error {
   135  	if !names.IsValidModel(config.ModelUUID) {
   136  		return errors.NotValidf("model UUID %q", config.ModelUUID)
   137  	}
   138  	if config.Facade == nil {
   139  		return errors.NotValidf("nil Facade")
   140  	}
   141  	if config.Guard == nil {
   142  		return errors.NotValidf("nil Guard")
   143  	}
   144  	if config.APIOpen == nil {
   145  		return errors.NotValidf("nil APIOpen")
   146  	}
   147  	if config.UploadBinaries == nil {
   148  		return errors.NotValidf("nil UploadBinaries")
   149  	}
   150  	if config.CharmDownloader == nil {
   151  		return errors.NotValidf("nil CharmDownloader")
   152  	}
   153  	if config.ToolsDownloader == nil {
   154  		return errors.NotValidf("nil ToolsDownloader")
   155  	}
   156  	if config.Clock == nil {
   157  		return errors.NotValidf("nil Clock")
   158  	}
   159  	return nil
   160  }
   161  
   162  // New returns a Worker backed by config, or an error.
   163  func New(config Config) (*Worker, error) {
   164  	if err := config.Validate(); err != nil {
   165  		return nil, errors.Trace(err)
   166  	}
   167  
   168  	// Soon we will get model specific logs generated in the
   169  	// controller logged against the model. Until then, distinguish
   170  	// the logs from different migrationmaster insteads using the short
   171  	// model UUID suffix.
   172  	loggerName := "juju.worker.migrationmaster." + names.NewModelTag(config.ModelUUID).ShortId()
   173  	logger := loggo.GetLoggerWithLabels(loggerName, corelogger.MIGRATION)
   174  
   175  	w := &Worker{
   176  		config: config,
   177  		logger: logger,
   178  	}
   179  	err := catacomb.Invoke(catacomb.Plan{
   180  		Site: &w.catacomb,
   181  		Work: w.run,
   182  	})
   183  	if err != nil {
   184  		return nil, errors.Trace(err)
   185  	}
   186  	return w, nil
   187  }
   188  
   189  // Worker waits until a migration is active and its configured
   190  // Fortress is locked down, and then orchestrates a model migration.
   191  type Worker struct {
   192  	catacomb            catacomb.Catacomb
   193  	config              Config
   194  	logger              loggo.Logger
   195  	lastFailure         string
   196  	minionReportTimeout time.Duration
   197  }
   198  
   199  // Kill implements worker.Worker.
   200  func (w *Worker) Kill() {
   201  	w.catacomb.Kill(nil)
   202  }
   203  
   204  // Wait implements worker.Worker.
   205  func (w *Worker) Wait() error {
   206  	return w.catacomb.Wait()
   207  }
   208  
   209  func (w *Worker) run() error {
   210  	status, err := w.waitForActiveMigration()
   211  	if err != nil {
   212  		return errors.Trace(err)
   213  	}
   214  
   215  	err = w.config.Guard.Lockdown(w.catacomb.Dying())
   216  	if errors.Cause(err) == fortress.ErrAborted {
   217  		return w.catacomb.ErrDying()
   218  	} else if err != nil {
   219  		return errors.Trace(err)
   220  	}
   221  
   222  	if w.minionReportTimeout, err = w.config.Facade.MinionReportTimeout(); err != nil {
   223  		return errors.Trace(err)
   224  	}
   225  
   226  	phase := status.Phase
   227  
   228  	for {
   229  		var err error
   230  		switch phase {
   231  		case coremigration.QUIESCE:
   232  			phase, err = w.doQUIESCE(status)
   233  		case coremigration.IMPORT:
   234  			phase, err = w.doIMPORT(status.TargetInfo, status.ModelUUID)
   235  		case coremigration.PROCESSRELATIONS:
   236  			phase, err = w.doPROCESSRELATIONS(status)
   237  		case coremigration.VALIDATION:
   238  			phase, err = w.doVALIDATION(status)
   239  		case coremigration.SUCCESS:
   240  			phase, err = w.doSUCCESS(status)
   241  		case coremigration.LOGTRANSFER:
   242  			phase, err = w.doLOGTRANSFER(status.TargetInfo, status.ModelUUID)
   243  		case coremigration.REAP:
   244  			phase, err = w.doREAP()
   245  		case coremigration.ABORT:
   246  			phase, err = w.doABORT(status.TargetInfo, status.ModelUUID)
   247  		default:
   248  			return errors.Errorf("unknown phase: %v [%d]", phase.String(), phase)
   249  		}
   250  
   251  		if err != nil {
   252  			// A phase handler should only return an error if the
   253  			// migration master should exit. In the face of other
   254  			// errors the handler should log the problem and then
   255  			// return the appropriate error phase to transition to -
   256  			// i.e. ABORT or REAPFAILED)
   257  			return errors.Trace(err)
   258  		}
   259  
   260  		if w.killed() {
   261  			return w.catacomb.ErrDying()
   262  		}
   263  
   264  		w.logger.Infof("setting migration phase to %s", phase)
   265  		if err := w.config.Facade.SetPhase(phase); err != nil {
   266  			return errors.Annotate(err, "failed to set phase")
   267  		}
   268  		status.Phase = phase
   269  
   270  		if modelHasMigrated(phase) {
   271  			return ErrMigrated
   272  		} else if phase.IsTerminal() {
   273  			// Some other terminal phase (aborted), exit and try
   274  			// again.
   275  			return ErrInactive
   276  		}
   277  	}
   278  }
   279  
   280  func (w *Worker) killed() bool {
   281  	select {
   282  	case <-w.catacomb.Dying():
   283  		return true
   284  	default:
   285  		return false
   286  	}
   287  }
   288  
   289  func (w *Worker) setInfoStatus(s string, a ...interface{}) {
   290  	w.setStatusAndLog(w.logger.Infof, s, a...)
   291  }
   292  
   293  func (w *Worker) setErrorStatus(s string, a ...interface{}) {
   294  	w.lastFailure = fmt.Sprintf(s, a...)
   295  	w.setStatusAndLog(w.logger.Errorf, s, a...)
   296  }
   297  
   298  func (w *Worker) setStatusAndLog(log func(string, ...interface{}), s string, a ...interface{}) {
   299  	message := fmt.Sprintf(s, a...)
   300  	log(message)
   301  	if err := w.setStatus(message); err != nil {
   302  		// Setting status isn't critical. If it fails, just logging
   303  		// the problem here and not passing it upstream makes things a
   304  		// lot clearer in the caller.
   305  		w.logger.Errorf("%s", err)
   306  	}
   307  }
   308  
   309  func (w *Worker) setStatus(message string) error {
   310  	err := w.config.Facade.SetStatusMessage(message)
   311  	return errors.Annotate(err, "failed to set status message")
   312  }
   313  
   314  func (w *Worker) doQUIESCE(status coremigration.MigrationStatus) (coremigration.Phase, error) {
   315  	// Run prechecks before waiting for minions to report back. This
   316  	// short-circuits the long timeout in the case of an agent being
   317  	// down.
   318  	if err := w.prechecks(status); err != nil {
   319  		w.setErrorStatus(err.Error())
   320  		return coremigration.ABORT, nil
   321  	}
   322  
   323  	ok, err := w.waitForMinions(status, failFast, "quiescing")
   324  	if err != nil {
   325  		return coremigration.UNKNOWN, errors.Trace(err)
   326  	}
   327  	if !ok {
   328  		return coremigration.ABORT, nil
   329  	}
   330  
   331  	// Now that the model is stable, run the prechecks again.
   332  	if err := w.prechecks(status); err != nil {
   333  		w.setErrorStatus(err.Error())
   334  		return coremigration.ABORT, nil
   335  	}
   336  
   337  	return coremigration.IMPORT, nil
   338  }
   339  
   340  var incompatibleTargetMessage = `
   341  target controller must be upgraded to 2.9.43 or later
   342  to be able to migrate models with cross model relations
   343  to other models hosted on the source controller
   344  `[1:]
   345  
   346  func (w *Worker) prechecks(status coremigration.MigrationStatus) error {
   347  	model, err := w.config.Facade.ModelInfo()
   348  	if err != nil {
   349  		return errors.Annotate(err, "failed to obtain model info during prechecks")
   350  	}
   351  
   352  	w.setInfoStatus("performing source prechecks")
   353  	if err := w.config.Facade.Prechecks(); err != nil {
   354  		return errors.Annotate(err, "source prechecks failed")
   355  	}
   356  
   357  	w.setInfoStatus("performing target prechecks")
   358  	targetConn, err := w.openAPIConn(status.TargetInfo)
   359  	if err != nil {
   360  		return errors.Annotate(err, "failed to connect to target controller during prechecks")
   361  	}
   362  	defer targetConn.Close()
   363  	if targetConn.ControllerTag() != status.TargetInfo.ControllerTag {
   364  		return errors.Errorf("unexpected target controller UUID (got %s, expected %s)",
   365  			targetConn.ControllerTag(), status.TargetInfo.ControllerTag)
   366  	}
   367  	targetClient := migrationtarget.NewClient(targetConn)
   368  	// If we have cross model relations to other models on this controller,
   369  	// we need to ensure the target controller is recent enough to process those.
   370  	if targetClient.BestFacadeVersion() < 2 {
   371  		_, localRelatedModels, err := w.config.Facade.SourceControllerInfo()
   372  		if err != nil {
   373  			return errors.Annotate(err, "cannot get local model info")
   374  		}
   375  		if len(localRelatedModels) > 0 {
   376  			return errors.Errorf(incompatibleTargetMessage)
   377  		}
   378  	}
   379  	err = targetClient.Prechecks(model)
   380  	return errors.Annotate(err, "target prechecks failed")
   381  }
   382  
   383  func (w *Worker) doIMPORT(targetInfo coremigration.TargetInfo, modelUUID string) (coremigration.Phase, error) {
   384  	err := w.transferModel(targetInfo, modelUUID)
   385  	if err != nil {
   386  		w.setErrorStatus("model data transfer failed, %v", err)
   387  		return coremigration.ABORT, nil
   388  	}
   389  	return coremigration.PROCESSRELATIONS, nil
   390  }
   391  
   392  type uploadWrapper struct {
   393  	client    *migrationtarget.Client
   394  	modelUUID string
   395  }
   396  
   397  // UploadTools prepends the model UUID to the args passed to the migration client.
   398  func (w *uploadWrapper) UploadTools(r io.ReadSeeker, vers version.Binary) (tools.List, error) {
   399  	return w.client.UploadTools(w.modelUUID, r, vers)
   400  }
   401  
   402  // UploadCharm prepends the model UUID to the args passed to the migration client.
   403  func (w *uploadWrapper) UploadCharm(curl string, charmRef string, content io.ReadSeeker) (string, error) {
   404  	return w.client.UploadCharm(w.modelUUID, curl, charmRef, content)
   405  }
   406  
   407  // UploadResource prepends the model UUID to the args passed to the migration client.
   408  func (w *uploadWrapper) UploadResource(res resources.Resource, content io.ReadSeeker) error {
   409  	return w.client.UploadResource(w.modelUUID, res, content)
   410  }
   411  
   412  // SetPlaceholderResource prepends the model UUID to the args passed to the migration client.
   413  func (w *uploadWrapper) SetPlaceholderResource(res resources.Resource) error {
   414  	return w.client.SetPlaceholderResource(w.modelUUID, res)
   415  }
   416  
   417  // SetUnitResource prepends the model UUID to the args passed to the migration client.
   418  func (w *uploadWrapper) SetUnitResource(unitName string, res resources.Resource) error {
   419  	return w.client.SetUnitResource(w.modelUUID, unitName, res)
   420  }
   421  
   422  func (w *Worker) transferModel(targetInfo coremigration.TargetInfo, modelUUID string) error {
   423  	w.setInfoStatus("exporting model")
   424  	serialized, err := w.config.Facade.Export()
   425  	if err != nil {
   426  		return errors.Annotate(err, "model export failed")
   427  	}
   428  
   429  	w.setInfoStatus("importing model into target controller")
   430  	conn, err := w.openAPIConn(targetInfo)
   431  	if err != nil {
   432  		return errors.Annotate(err, "failed to connect to target controller")
   433  	}
   434  	defer conn.Close()
   435  	targetClient := migrationtarget.NewClient(conn)
   436  	err = targetClient.Import(serialized.Bytes)
   437  	if err != nil {
   438  		return errors.Annotate(err, "failed to import model into target controller")
   439  	}
   440  
   441  	if wrench.IsActive("migrationmaster", "die-in-export") {
   442  		// Simulate a abort causing failure to test last status not over written.
   443  		return errors.New("wrench in the transferModel works")
   444  	}
   445  
   446  	w.setInfoStatus("uploading model binaries into target controller")
   447  	wrapper := &uploadWrapper{targetClient, modelUUID}
   448  	err = w.config.UploadBinaries(migration.UploadBinariesConfig{
   449  		Charms:          serialized.Charms,
   450  		CharmDownloader: w.config.CharmDownloader,
   451  		CharmUploader:   wrapper,
   452  
   453  		Tools:           serialized.Tools,
   454  		ToolsDownloader: w.config.ToolsDownloader,
   455  		ToolsUploader:   wrapper,
   456  
   457  		Resources:          serialized.Resources,
   458  		ResourceDownloader: w.config.Facade,
   459  		ResourceUploader:   wrapper,
   460  	})
   461  	return errors.Annotate(err, "failed to migrate binaries")
   462  }
   463  
   464  func (w *Worker) doPROCESSRELATIONS(status coremigration.MigrationStatus) (coremigration.Phase, error) {
   465  	err := w.processRelations(status.TargetInfo, status.ModelUUID)
   466  	if err != nil {
   467  		w.setErrorStatus("processing relations failed, %v", err)
   468  		return coremigration.ABORT, nil
   469  	}
   470  	return coremigration.VALIDATION, nil
   471  }
   472  
   473  func (w *Worker) processRelations(targetInfo coremigration.TargetInfo, modelUUID string) error {
   474  	w.setInfoStatus("processing relations")
   475  	err := w.config.Facade.ProcessRelations(targetInfo.ControllerAlias)
   476  	if err != nil {
   477  		return errors.Annotate(err, "processing relations failed")
   478  	}
   479  	return nil
   480  }
   481  
   482  func (w *Worker) doVALIDATION(status coremigration.MigrationStatus) (coremigration.Phase, error) {
   483  	// Wait for agents to complete their validation checks.
   484  	ok, err := w.waitForMinions(status, failFast, "validating")
   485  	if err != nil {
   486  		return coremigration.UNKNOWN, errors.Trace(err)
   487  	}
   488  	if !ok {
   489  		return coremigration.ABORT, nil
   490  	}
   491  
   492  	client, closer, err := w.openTargetAPI(status.TargetInfo)
   493  	if err != nil {
   494  		return coremigration.UNKNOWN, errors.Trace(err)
   495  	}
   496  	defer func() { _ = closer() }()
   497  
   498  	// Check that the provider and target controller agree about what
   499  	// machines belong to the migrated model.
   500  	ok, err = w.checkTargetMachines(client, status.ModelUUID)
   501  	if err != nil {
   502  		return coremigration.UNKNOWN, errors.Trace(err)
   503  	}
   504  	if !ok {
   505  		return coremigration.ABORT, nil
   506  	}
   507  
   508  	// Once all agents have validated, activate the model in the
   509  	// target controller.
   510  	err = w.activateModel(client, status.ModelUUID)
   511  	if err != nil {
   512  		w.setErrorStatus("model activation failed, %v", err)
   513  		return coremigration.ABORT, nil
   514  	}
   515  	return coremigration.SUCCESS, nil
   516  }
   517  
   518  func (w *Worker) checkTargetMachines(targetClient *migrationtarget.Client, modelUUID string) (bool, error) {
   519  	w.setInfoStatus("checking machines in migrated model")
   520  	results, err := targetClient.CheckMachines(modelUUID)
   521  	if err != nil {
   522  		return false, errors.Trace(err)
   523  	}
   524  	if len(results) > 0 {
   525  		for _, resultErr := range results {
   526  			w.logger.Errorf(resultErr.Error())
   527  		}
   528  		plural := "s"
   529  		if len(results) == 1 {
   530  			plural = ""
   531  		}
   532  		w.setErrorStatus("machine sanity check failed, %d error%s found", len(results), plural)
   533  		return false, nil
   534  	}
   535  	return true, nil
   536  }
   537  
   538  func (w *Worker) activateModel(targetClient *migrationtarget.Client, modelUUID string) error {
   539  	w.setInfoStatus("activating model in target controller")
   540  	info, localRelatedModels, err := w.config.Facade.SourceControllerInfo()
   541  	if err != nil {
   542  		return errors.Trace(err)
   543  	}
   544  	return errors.Trace(targetClient.Activate(modelUUID, info, localRelatedModels))
   545  }
   546  
   547  func (w *Worker) doSUCCESS(status coremigration.MigrationStatus) (coremigration.Phase, error) {
   548  	_, err := w.waitForMinions(status, waitForAll, "successful")
   549  	if err != nil {
   550  		return coremigration.UNKNOWN, errors.Trace(err)
   551  	}
   552  	err = w.transferResources(status.TargetInfo, status.ModelUUID)
   553  	if err != nil {
   554  		return coremigration.UNKNOWN, errors.Trace(err)
   555  	}
   556  	// There's no turning back from SUCCESS - any problems should have
   557  	// been picked up in VALIDATION. After the minion wait in the
   558  	// SUCCESS phase, the migration can only proceed to LOGTRANSFER.
   559  	return coremigration.LOGTRANSFER, nil
   560  }
   561  
   562  func (w *Worker) transferResources(targetInfo coremigration.TargetInfo, modelUUID string) error {
   563  	w.setInfoStatus("transferring ownership of cloud resources to target controller")
   564  	conn, err := w.openAPIConn(targetInfo)
   565  	if err != nil {
   566  		return errors.Trace(err)
   567  	}
   568  	defer conn.Close()
   569  
   570  	targetClient := migrationtarget.NewClient(conn)
   571  	err = targetClient.AdoptResources(modelUUID)
   572  	return errors.Trace(err)
   573  }
   574  
   575  func (w *Worker) doLOGTRANSFER(targetInfo coremigration.TargetInfo, modelUUID string) (coremigration.Phase, error) {
   576  	err := w.transferLogs(targetInfo, modelUUID)
   577  	if err != nil {
   578  		return coremigration.UNKNOWN, errors.Trace(err)
   579  	}
   580  	return coremigration.REAP, nil
   581  }
   582  
   583  func (w *Worker) transferLogs(targetInfo coremigration.TargetInfo, modelUUID string) error {
   584  	sent := 0
   585  	reportProgress := func(finished bool, sent int) {
   586  		verb := "transferring"
   587  		if finished {
   588  			verb = "transferred"
   589  		}
   590  		w.setInfoStatus("successful, %s logs to target controller (%d sent)", verb, sent)
   591  	}
   592  	reportProgress(false, sent)
   593  
   594  	conn, err := w.openAPIConn(targetInfo)
   595  	if err != nil {
   596  		return errors.Annotate(err, "connecting to target API")
   597  	}
   598  	targetClient := migrationtarget.NewClient(conn)
   599  	latestLogTime, err := targetClient.LatestLogTime(modelUUID)
   600  	if err != nil {
   601  		return errors.Annotate(err, "getting log start time")
   602  	}
   603  
   604  	if latestLogTime != utcZero {
   605  		w.logger.Debugf("log transfer was interrupted - restarting from %s", latestLogTime)
   606  	}
   607  
   608  	throwWrench := latestLogTime == utcZero && wrench.IsActive("migrationmaster", "die-after-500-log-messages")
   609  
   610  	ctx, cancel := context.WithCancel(context.TODO())
   611  	defer cancel()
   612  
   613  	logSource, err := w.config.Facade.StreamModelLog(ctx, latestLogTime)
   614  	if err != nil {
   615  		return errors.Annotate(err, "opening source log stream")
   616  	}
   617  
   618  	logTarget, err := targetClient.OpenLogTransferStream(modelUUID)
   619  	if err != nil {
   620  		return errors.Annotate(err, "opening target log stream")
   621  	}
   622  	defer logTarget.Close()
   623  
   624  	clk := w.config.Clock
   625  	logProgress := clk.After(progressUpdateInterval)
   626  
   627  	for {
   628  		select {
   629  		case <-w.catacomb.Dying():
   630  			return w.catacomb.ErrDying()
   631  		case msg, ok := <-logSource:
   632  			if !ok {
   633  				// The channel's been closed, we're finished!
   634  				reportProgress(true, sent)
   635  				return nil
   636  			}
   637  			err := logTarget.WriteJSON(params.LogRecord{
   638  				Entity:   msg.Entity,
   639  				Time:     msg.Timestamp,
   640  				Module:   msg.Module,
   641  				Location: msg.Location,
   642  				Level:    msg.Severity,
   643  				Message:  msg.Message,
   644  				Labels:   msg.Labels,
   645  			})
   646  			if err != nil {
   647  				return errors.Trace(err)
   648  			}
   649  			sent++
   650  
   651  			if throwWrench && sent == 500 {
   652  				// Simulate a connection drop to test restartability.
   653  				return errors.New("wrench in the works")
   654  			}
   655  		case <-logProgress:
   656  			reportProgress(false, sent)
   657  			logProgress = clk.After(progressUpdateInterval)
   658  		}
   659  	}
   660  }
   661  
   662  func (w *Worker) doREAP() (coremigration.Phase, error) {
   663  	w.setInfoStatus("successful, removing model from source controller")
   664  	// NOTE(babbageclunk): Calling Reap will set the migration phase
   665  	// to DONE if successful - this avoids a race where this worker is
   666  	// killed by the model going away before it can update the phase
   667  	// itself.
   668  	err := w.config.Facade.Reap()
   669  	if err != nil {
   670  		w.setErrorStatus("removing exported model failed: %s", err.Error())
   671  		return coremigration.REAPFAILED, nil
   672  	}
   673  	return coremigration.DONE, nil
   674  }
   675  
   676  func (w *Worker) doABORT(targetInfo coremigration.TargetInfo, modelUUID string) (coremigration.Phase, error) {
   677  	w.setInfoStatus("aborted, removing model from target controller: %s", w.lastFailure)
   678  	if err := w.removeImportedModel(targetInfo, modelUUID); err != nil {
   679  		// This isn't fatal. Removing the imported model is a best
   680  		// efforts attempt so just report the error and proceed.
   681  		w.logger.Warningf("failed to remove model from target controller, %v", err)
   682  	}
   683  	return coremigration.ABORTDONE, nil
   684  }
   685  
   686  func (w *Worker) removeImportedModel(targetInfo coremigration.TargetInfo, modelUUID string) error {
   687  	conn, err := w.openAPIConn(targetInfo)
   688  	if err != nil {
   689  		return errors.Trace(err)
   690  	}
   691  	defer conn.Close()
   692  
   693  	targetClient := migrationtarget.NewClient(conn)
   694  	err = targetClient.Abort(modelUUID)
   695  	return errors.Trace(err)
   696  }
   697  
   698  func (w *Worker) waitForActiveMigration() (coremigration.MigrationStatus, error) {
   699  	var empty coremigration.MigrationStatus
   700  
   701  	watcher, err := w.config.Facade.Watch()
   702  	if err != nil {
   703  		return empty, errors.Annotate(err, "watching for migration")
   704  	}
   705  	if err := w.catacomb.Add(watcher); err != nil {
   706  		return empty, errors.Trace(err)
   707  	}
   708  	defer watcher.Kill()
   709  
   710  	for {
   711  		select {
   712  		case <-w.catacomb.Dying():
   713  			return empty, w.catacomb.ErrDying()
   714  		case <-watcher.Changes():
   715  		}
   716  
   717  		status, err := w.config.Facade.MigrationStatus()
   718  		switch {
   719  		case params.IsCodeNotFound(err):
   720  			// There's never been a migration.
   721  		case err == nil && status.Phase.IsTerminal():
   722  			// No migration in progress.
   723  			if modelHasMigrated(status.Phase) {
   724  				return empty, ErrMigrated
   725  			}
   726  		case err != nil:
   727  			return empty, errors.Annotate(err, "retrieving migration status")
   728  		default:
   729  			// Migration is in progress.
   730  			return status, nil
   731  		}
   732  
   733  		// While waiting for a migration, ensure the fortress is open.
   734  		if err := w.config.Guard.Unlock(); err != nil {
   735  			return empty, errors.Trace(err)
   736  		}
   737  	}
   738  }
   739  
   740  // Possible values for waitForMinion's waitPolicy argument.
   741  const failFast = false  // Stop waiting at first minion failure report
   742  const waitForAll = true // Wait for all minion reports to arrive (or timeout)
   743  
   744  func (w *Worker) waitForMinions(
   745  	status coremigration.MigrationStatus,
   746  	waitPolicy bool,
   747  	infoPrefix string,
   748  ) (success bool, err error) {
   749  	clk := w.config.Clock
   750  	timeout := clk.After(w.minionReportTimeout)
   751  
   752  	w.setInfoStatus("%s, waiting for agents to report back", infoPrefix)
   753  	w.logger.Infof("waiting for agents to report back for migration phase %s (will wait up to %s)",
   754  		status.Phase, truncDuration(w.minionReportTimeout))
   755  
   756  	watch, err := w.config.Facade.WatchMinionReports()
   757  	if err != nil {
   758  		return false, errors.Trace(err)
   759  	}
   760  	if err := w.catacomb.Add(watch); err != nil {
   761  		return false, errors.Trace(err)
   762  	}
   763  
   764  	logProgress := clk.After(progressUpdateInterval)
   765  
   766  	var reports coremigration.MinionReports
   767  	for {
   768  		select {
   769  		case <-w.catacomb.Dying():
   770  			return false, w.catacomb.ErrDying()
   771  
   772  		case <-timeout:
   773  			w.logger.Errorf(formatMinionTimeout(reports, status, infoPrefix))
   774  			w.setErrorStatus("%s, timed out waiting for agents to report", infoPrefix)
   775  			return false, nil
   776  
   777  		case <-watch.Changes():
   778  			var err error
   779  			reports, err = w.config.Facade.MinionReports()
   780  			if err != nil {
   781  				return false, errors.Trace(err)
   782  			}
   783  			if err := validateMinionReports(reports, status); err != nil {
   784  				return false, errors.Trace(err)
   785  			}
   786  			w.logger.Debugf("migration minion reports:\n%s", pretty.Sprint(reports))
   787  			failures := len(reports.FailedMachines) + len(reports.FailedUnits) + len(reports.FailedApplications)
   788  			if failures > 0 {
   789  				w.logger.Errorf(formatMinionFailure(reports, infoPrefix))
   790  				w.setErrorStatus("%s, some agents reported failure", infoPrefix)
   791  				if waitPolicy == failFast {
   792  					return false, nil
   793  				}
   794  			}
   795  			if reports.UnknownCount == 0 {
   796  				msg := formatMinionWaitDone(reports, infoPrefix)
   797  				if failures > 0 {
   798  					w.logger.Errorf(msg)
   799  					w.setErrorStatus("%s, some agents reported failure", infoPrefix)
   800  					return false, nil
   801  				}
   802  				w.logger.Infof(msg)
   803  				w.setInfoStatus("%s, all agents reported success", infoPrefix)
   804  				return true, nil
   805  			}
   806  
   807  		case <-logProgress:
   808  			w.setInfoStatus("%s, %s", infoPrefix, formatMinionWaitUpdate(reports))
   809  			logProgress = clk.After(progressUpdateInterval)
   810  		}
   811  	}
   812  }
   813  
   814  func truncDuration(d time.Duration) time.Duration {
   815  	return (d / time.Second) * time.Second
   816  }
   817  
   818  func validateMinionReports(reports coremigration.MinionReports, status coremigration.MigrationStatus) error {
   819  	if reports.MigrationId != status.MigrationId {
   820  		return errors.Errorf("unexpected migration id in minion reports, got %v, expected %v",
   821  			reports.MigrationId, status.MigrationId)
   822  	}
   823  	if reports.Phase != status.Phase {
   824  		return errors.Errorf("minion reports phase (%s) does not match migration phase (%s)",
   825  			reports.Phase, status.Phase)
   826  	}
   827  	return nil
   828  }
   829  
   830  func formatMinionTimeout(
   831  	reports coremigration.MinionReports,
   832  	status coremigration.MigrationStatus,
   833  	infoPrefix string,
   834  ) string {
   835  	if reports.IsZero() {
   836  		return fmt.Sprintf("no agents reported in time")
   837  	}
   838  
   839  	var fails []string
   840  	if len(reports.SomeUnknownMachines) > 0 {
   841  		fails = append(fails, fmt.Sprintf("machines: %s", strings.Join(reports.SomeUnknownMachines, ",")))
   842  	}
   843  	if len(reports.SomeUnknownUnits) > 0 {
   844  		fails = append(fails, fmt.Sprintf("units: %s", strings.Join(reports.SomeUnknownUnits, ",")))
   845  	}
   846  	if len(reports.SomeUnknownApplications) > 0 {
   847  		fails = append(fails, fmt.Sprintf("applications: %s", strings.Join(reports.SomeUnknownApplications, ",")))
   848  	}
   849  	return fmt.Sprintf("%d agents failed to report in time for %q phase (including %s)",
   850  		reports.UnknownCount, infoPrefix, strings.Join(fails, "; "))
   851  }
   852  
   853  func formatMinionFailure(reports coremigration.MinionReports, infoPrefix string) string {
   854  	var fails []string
   855  	if len(reports.FailedMachines) > 0 {
   856  		fails = append(fails, fmt.Sprintf("machines: %s", strings.Join(reports.FailedMachines, ",")))
   857  	}
   858  	if len(reports.FailedUnits) > 0 {
   859  		fails = append(fails, fmt.Sprintf("units: %s", strings.Join(reports.FailedUnits, ",")))
   860  	}
   861  	if len(reports.FailedApplications) > 0 {
   862  		fails = append(fails, fmt.Sprintf("applications: %s", strings.Join(reports.FailedApplications, ",")))
   863  	}
   864  	return fmt.Sprintf("agents failed phase %q (%s)", infoPrefix, strings.Join(fails, "; "))
   865  }
   866  
   867  func formatMinionWaitDone(reports coremigration.MinionReports, infoPrefix string) string {
   868  	return fmt.Sprintf("completed waiting for agents to report for %q, %d succeeded, %d failed",
   869  		infoPrefix, reports.SuccessCount, len(reports.FailedMachines)+len(reports.FailedUnits)+len(reports.FailedApplications))
   870  }
   871  
   872  func formatMinionWaitUpdate(reports coremigration.MinionReports) string {
   873  	if reports.IsZero() {
   874  		return fmt.Sprintf("no reports from agents yet")
   875  	}
   876  
   877  	msg := fmt.Sprintf("waiting for agents to report back: %d succeeded, %d still to report",
   878  		reports.SuccessCount, reports.UnknownCount)
   879  	failed := len(reports.FailedMachines) + len(reports.FailedUnits) + len(reports.FailedApplications)
   880  	if failed > 0 {
   881  		msg += fmt.Sprintf(", %d failed", failed)
   882  	}
   883  	return msg
   884  }
   885  
   886  func (w *Worker) openTargetAPI(targetInfo coremigration.TargetInfo) (*migrationtarget.Client, func() error, error) {
   887  	conn, err := w.openAPIConn(targetInfo)
   888  	if err != nil {
   889  		return nil, nil, errors.Trace(err)
   890  	}
   891  	return migrationtarget.NewClient(conn), conn.Close, nil
   892  }
   893  
   894  func (w *Worker) openAPIConn(targetInfo coremigration.TargetInfo) (api.Connection, error) {
   895  	return w.openAPIConnForModel(targetInfo, "")
   896  }
   897  
   898  func (w *Worker) openAPIConnForModel(targetInfo coremigration.TargetInfo, modelUUID string) (api.Connection, error) {
   899  	apiInfo := &api.Info{
   900  		Addrs:     targetInfo.Addrs,
   901  		CACert:    targetInfo.CACert,
   902  		Password:  targetInfo.Password,
   903  		ModelTag:  names.NewModelTag(modelUUID),
   904  		Macaroons: targetInfo.Macaroons,
   905  	}
   906  	// Only local users must be added to the api info.
   907  	// For external users, the tag needs to be left empty.
   908  	if targetInfo.AuthTag.IsLocal() {
   909  		apiInfo.Tag = targetInfo.AuthTag
   910  	}
   911  	return w.config.APIOpen(apiInfo, migration.ControllerDialOpts())
   912  }
   913  
   914  func modelHasMigrated(phase coremigration.Phase) bool {
   915  	return phase == coremigration.DONE || phase == coremigration.REAPFAILED
   916  }