github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/worker/migrationmaster/worker.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package migrationmaster
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"github.com/juju/loggo"
    11  
    12  	"github.com/juju/juju/api"
    13  	"github.com/juju/juju/api/migrationmaster"
    14  	"github.com/juju/juju/api/migrationtarget"
    15  	"github.com/juju/juju/apiserver/params"
    16  	"github.com/juju/juju/core/migration"
    17  	"github.com/juju/juju/watcher"
    18  	"github.com/juju/juju/worker/catacomb"
    19  	"github.com/juju/juju/worker/dependency"
    20  	"github.com/juju/juju/worker/fortress"
    21  )
    22  
    23  var (
    24  	logger           = loggo.GetLogger("juju.worker.migrationmaster")
    25  	apiOpen          = api.Open
    26  	tempSuccessSleep = 10 * time.Second
    27  
    28  	// ErrDoneForNow indicates a temporary issue was encountered and
    29  	// that the worker should restart and retry.
    30  	ErrDoneForNow = errors.New("done for now")
    31  )
    32  
    33  // Facade exposes controller functionality to a Worker.
    34  type Facade interface {
    35  
    36  	// Watch returns a watcher which reports when a migration is
    37  	// active for the model associated with the API connection.
    38  	Watch() (watcher.NotifyWatcher, error)
    39  
    40  	// GetMigrationStatus returns the details and progress of the
    41  	// latest model migration.
    42  	GetMigrationStatus() (migrationmaster.MigrationStatus, error)
    43  
    44  	// SetPhase updates the phase of the currently active model
    45  	// migration.
    46  	SetPhase(migration.Phase) error
    47  
    48  	// Export returns a serialized representation of the model
    49  	// associated with the API connection.
    50  	Export() ([]byte, error)
    51  }
    52  
    53  // Config defines the operation of a Worker.
    54  type Config struct {
    55  	Facade Facade
    56  	Guard  fortress.Guard
    57  }
    58  
    59  // Validate returns an error if config cannot drive a Worker.
    60  func (config Config) Validate() error {
    61  	if config.Facade == nil {
    62  		return errors.NotValidf("nil Facade")
    63  	}
    64  	if config.Guard == nil {
    65  		return errors.NotValidf("nil Guard")
    66  	}
    67  	return nil
    68  }
    69  
    70  // New returns a Worker backed by config, or an error.
    71  func New(config Config) (*Worker, error) {
    72  	if err := config.Validate(); err != nil {
    73  		return nil, errors.Trace(err)
    74  	}
    75  	w := &Worker{
    76  		config: config,
    77  	}
    78  	err := catacomb.Invoke(catacomb.Plan{
    79  		Site: &w.catacomb,
    80  		Work: w.run,
    81  	})
    82  	if err != nil {
    83  		return nil, errors.Trace(err)
    84  	}
    85  	return w, nil
    86  }
    87  
    88  // Worker waits until a migration is active and its configured
    89  // Fortress is locked down, and then orchestrates a model migration.
    90  type Worker struct {
    91  	catacomb catacomb.Catacomb
    92  	config   Config
    93  }
    94  
    95  // Kill implements worker.Worker.
    96  func (w *Worker) Kill() {
    97  	w.catacomb.Kill(nil)
    98  }
    99  
   100  // Wait implements worker.Worker.
   101  func (w *Worker) Wait() error {
   102  	return w.catacomb.Wait()
   103  }
   104  
   105  func (w *Worker) run() error {
   106  	status, err := w.waitForActiveMigration()
   107  	if err != nil {
   108  		return errors.Trace(err)
   109  	}
   110  
   111  	err = w.config.Guard.Lockdown(w.catacomb.Dying())
   112  	if errors.Cause(err) == fortress.ErrAborted {
   113  		return w.catacomb.ErrDying()
   114  	} else if err != nil {
   115  		return errors.Trace(err)
   116  	}
   117  
   118  	// TODO(mjs) - log messages should indicate the model name and
   119  	// UUID. Independent logger per migration master instance?
   120  
   121  	phase := status.Phase
   122  	for {
   123  		var err error
   124  		switch phase {
   125  		case migration.QUIESCE:
   126  			phase, err = w.doQUIESCE()
   127  		case migration.READONLY:
   128  			phase, err = w.doREADONLY()
   129  		case migration.PRECHECK:
   130  			phase, err = w.doPRECHECK()
   131  		case migration.IMPORT:
   132  			phase, err = w.doIMPORT(status.TargetInfo)
   133  		case migration.VALIDATION:
   134  			phase, err = w.doVALIDATION(status.TargetInfo, status.ModelUUID)
   135  		case migration.SUCCESS:
   136  			phase, err = w.doSUCCESS()
   137  		case migration.LOGTRANSFER:
   138  			phase, err = w.doLOGTRANSFER()
   139  		case migration.REAP:
   140  			phase, err = w.doREAP()
   141  		case migration.ABORT:
   142  			phase, err = w.doABORT(status.TargetInfo, status.ModelUUID)
   143  		default:
   144  			return errors.Errorf("unknown phase: %v [%d]", phase.String(), phase)
   145  		}
   146  
   147  		if err != nil {
   148  			// A phase handler should only return an error if the
   149  			// migration master should exit. In the face of other
   150  			// errors the handler should log the problem and then
   151  			// return the appropriate error phases to transition to -
   152  			// i.e. ABORT or REAPFAILED)
   153  			return errors.Trace(err)
   154  		}
   155  
   156  		if w.killed() {
   157  			return w.catacomb.ErrDying()
   158  		}
   159  
   160  		logger.Infof("setting migration phase to %s", phase)
   161  		if err := w.config.Facade.SetPhase(phase); err != nil {
   162  			return errors.Annotate(err, "failed to set phase")
   163  		}
   164  
   165  		if modelHasMigrated(phase) {
   166  			// TODO(mjs) - use manifold Filter so that the dep engine
   167  			// error types aren't required here.
   168  			return dependency.ErrUninstall
   169  		} else if phase.IsTerminal() {
   170  			// Some other terminal phase, exit and try again.
   171  			return ErrDoneForNow
   172  		}
   173  	}
   174  }
   175  
   176  func (w *Worker) killed() bool {
   177  	select {
   178  	case <-w.catacomb.Dying():
   179  		return true
   180  	default:
   181  		return false
   182  	}
   183  }
   184  
   185  func (w *Worker) doQUIESCE() (migration.Phase, error) {
   186  	// TODO(mjs) - Wait for all agents to report back.
   187  	return migration.READONLY, nil
   188  }
   189  
   190  func (w *Worker) doREADONLY() (migration.Phase, error) {
   191  	// TODO(mjs) - To be implemented.
   192  	return migration.PRECHECK, nil
   193  }
   194  
   195  func (w *Worker) doPRECHECK() (migration.Phase, error) {
   196  	// TODO(mjs) - To be implemented.
   197  	return migration.IMPORT, nil
   198  }
   199  
   200  func (w *Worker) doIMPORT(targetInfo migration.TargetInfo) (migration.Phase, error) {
   201  	logger.Infof("exporting model")
   202  	bytes, err := w.config.Facade.Export()
   203  	if err != nil {
   204  		logger.Errorf("model export failed: %v", err)
   205  		return migration.ABORT, nil
   206  	}
   207  
   208  	logger.Infof("opening API connection to target controller")
   209  	conn, err := openAPIConn(targetInfo)
   210  	if err != nil {
   211  		logger.Errorf("failed to connect to target controller: %v", err)
   212  		return migration.ABORT, nil
   213  	}
   214  	defer conn.Close()
   215  
   216  	logger.Infof("importing model into target controller")
   217  	targetClient := migrationtarget.NewClient(conn)
   218  	err = targetClient.Import(bytes)
   219  	if err != nil {
   220  		logger.Errorf("failed to import model into target controller: %v", err)
   221  		return migration.ABORT, nil
   222  	}
   223  
   224  	return migration.VALIDATION, nil
   225  }
   226  
   227  func (w *Worker) doVALIDATION(targetInfo migration.TargetInfo, modelUUID string) (migration.Phase, error) {
   228  	// TODO(mjs) - Wait for all agents to report back.
   229  
   230  	// Once all agents have validated, activate the model.
   231  	err := activateModel(targetInfo, modelUUID)
   232  	if err != nil {
   233  		return migration.ABORT, nil
   234  	}
   235  	return migration.SUCCESS, nil
   236  }
   237  
   238  func activateModel(targetInfo migration.TargetInfo, modelUUID string) error {
   239  	conn, err := openAPIConn(targetInfo)
   240  	if err != nil {
   241  		return errors.Trace(err)
   242  	}
   243  	defer conn.Close()
   244  
   245  	targetClient := migrationtarget.NewClient(conn)
   246  	err = targetClient.Activate(modelUUID)
   247  	return errors.Trace(err)
   248  }
   249  
   250  func (w *Worker) doSUCCESS() (migration.Phase, error) {
   251  	// XXX(mjs) - this is a horrible hack, which helps to ensure that
   252  	// minions will see the SUCCESS state (due to watcher event
   253  	// coalescing). It will go away soon.
   254  	time.Sleep(tempSuccessSleep)
   255  	return migration.LOGTRANSFER, nil
   256  }
   257  
   258  func (w *Worker) doLOGTRANSFER() (migration.Phase, error) {
   259  	// TODO(mjs) - To be implemented.
   260  	return migration.REAP, nil
   261  }
   262  
   263  func (w *Worker) doREAP() (migration.Phase, error) {
   264  	// TODO(mjs) - To be implemented.
   265  	return migration.DONE, nil
   266  }
   267  
   268  func (w *Worker) doABORT(targetInfo migration.TargetInfo, modelUUID string) (migration.Phase, error) {
   269  	if err := removeImportedModel(targetInfo, modelUUID); err != nil {
   270  		// This isn't fatal. Removing the imported model is a best
   271  		// efforts attempt.
   272  		logger.Errorf("failed to reverse model import: %v", err)
   273  	}
   274  	return migration.ABORTDONE, nil
   275  }
   276  
   277  func removeImportedModel(targetInfo migration.TargetInfo, modelUUID string) error {
   278  	conn, err := openAPIConn(targetInfo)
   279  	if err != nil {
   280  		return errors.Trace(err)
   281  	}
   282  	defer conn.Close()
   283  
   284  	targetClient := migrationtarget.NewClient(conn)
   285  	err = targetClient.Abort(modelUUID)
   286  	return errors.Trace(err)
   287  }
   288  
   289  func (w *Worker) waitForActiveMigration() (migrationmaster.MigrationStatus, error) {
   290  	var empty migrationmaster.MigrationStatus
   291  
   292  	watcher, err := w.config.Facade.Watch()
   293  	if err != nil {
   294  		return empty, errors.Annotate(err, "watching for migration")
   295  	}
   296  	if err := w.catacomb.Add(watcher); err != nil {
   297  		return empty, errors.Trace(err)
   298  	}
   299  	defer watcher.Kill()
   300  
   301  	for {
   302  		select {
   303  		case <-w.catacomb.Dying():
   304  			return empty, w.catacomb.ErrDying()
   305  		case <-watcher.Changes():
   306  		}
   307  		status, err := w.config.Facade.GetMigrationStatus()
   308  		switch {
   309  		case params.IsCodeNotFound(err):
   310  			if err := w.config.Guard.Unlock(); err != nil {
   311  				return empty, errors.Trace(err)
   312  			}
   313  			continue
   314  		case err != nil:
   315  			return empty, errors.Annotate(err, "retrieving migration status")
   316  		}
   317  		if modelHasMigrated(status.Phase) {
   318  			return empty, dependency.ErrUninstall
   319  		}
   320  		if !status.Phase.IsTerminal() {
   321  			return status, nil
   322  		}
   323  	}
   324  }
   325  
   326  func openAPIConn(targetInfo migration.TargetInfo) (api.Connection, error) {
   327  	apiInfo := &api.Info{
   328  		Addrs:    targetInfo.Addrs,
   329  		CACert:   targetInfo.CACert,
   330  		Tag:      targetInfo.AuthTag,
   331  		Password: targetInfo.Password,
   332  	}
   333  	// Use zero DialOpts (no retries) because the worker must stay
   334  	// responsive to Kill requests. We don't want it to be blocked by
   335  	// a long set of retry attempts.
   336  	return apiOpen(apiInfo, api.DialOpts{})
   337  }
   338  
   339  func modelHasMigrated(phase migration.Phase) bool {
   340  	return phase == migration.DONE || phase == migration.REAPFAILED
   341  }