github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/worker/apicaller/connect.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package apicaller
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"github.com/juju/utils"
    11  
    12  	"github.com/juju/juju/agent"
    13  	"github.com/juju/juju/api"
    14  	apiagent "github.com/juju/juju/api/agent"
    15  	"github.com/juju/juju/apiserver/params"
    16  )
    17  
    18  var (
    19  	// checkProvisionedStrategy defines the evil uninterruptible
    20  	// retry strategy for "handling" ErrNotProvisioned. It exists
    21  	// in the name of stability; as the code evolves, it would be
    22  	// great to see its function moved up a level or two.
    23  	checkProvisionedStrategy = utils.AttemptStrategy{
    24  		Total: 1 * time.Minute,
    25  		Delay: 5 * time.Second,
    26  	}
    27  
    28  	// newConnFacade should similarly move up a level so it can
    29  	// be explicitly configured without export_test hackery
    30  	newConnFacade = apiagent.NewConnFacade
    31  
    32  	// errAgentEntityDead is an internal error returned by getEntity.
    33  	errAgentEntityDead = errors.New("agent entity is dead")
    34  
    35  	// ErrConnectImpossible indicates that we can contact an apiserver
    36  	// but have no hope of authenticating a connection with it.
    37  	ErrConnectImpossible = errors.New("connection permanently impossible")
    38  
    39  	// ErrChangedPassword indicates that the agent config used to connect
    40  	// has been updated with a new password, and you should try again.
    41  	ErrChangedPassword = errors.New("insecure password replaced; retry")
    42  )
    43  
    44  // APIOpen is an api.OpenFunc that wraps api.Open, and handles the edge
    45  // case where a model has jumping several versions and doesn't yet have
    46  // the model UUID cached in the agent config; in which case we fall back
    47  // to login version 1.
    48  //
    49  // You probably want to use this in ManifoldConfig; *we* probably want to
    50  // put this particular hack inside api.Open, but I seem to recall there
    51  // being some complication last time I thought that was a good idea.
    52  func APIOpen(info *api.Info, opts api.DialOpts) (api.Connection, error) {
    53  	if info.ModelTag.Id() == "" {
    54  		return api.OpenWithVersion(info, opts, 1)
    55  	}
    56  	return api.Open(info, opts)
    57  }
    58  
    59  // OnlyConnect logs into the API using the supplied agent's credentials.
    60  func OnlyConnect(a agent.Agent, apiOpen api.OpenFunc) (api.Connection, error) {
    61  	agentConfig := a.CurrentConfig()
    62  	info, ok := agentConfig.APIInfo()
    63  	if !ok {
    64  		return nil, errors.New("API info not available")
    65  	}
    66  	conn, _, err := connectFallback(apiOpen, info, agentConfig.OldPassword())
    67  	if err != nil {
    68  		return nil, errors.Trace(err)
    69  	}
    70  	return conn, nil
    71  }
    72  
    73  // connectFallback opens an API connection using the supplied info,
    74  // or a copy using the fallbackPassword; blocks for up to 5 minutes
    75  // if it encounters a CodeNotProvisioned error, periodically retrying;
    76  // and eventually, having either succeeded, failed, or timed out, returns:
    77  //
    78  //   * (if successful) the connection, and whether the fallback was used
    79  //   * (otherwise) whatever error it most recently encountered
    80  //
    81  // It's clear that it still has machine-agent concerns still baked in,
    82  // but there's no obvious practical path to separating those entirely at
    83  // the moment.
    84  //
    85  // (The right answer is probably to treat CodeNotProvisioned as a normal
    86  // error and depend on (currently nonexistent) exponential backoff in
    87  // the framework: either it'll work soon enough, or the controller will
    88  // spot the error and nuke the machine anyway. No harm leaving the local
    89  // agent running and occasionally polling for changes -- it won't do much
    90  // until it's managed to log in, and any suicide-cutoff point we pick here
    91  // will be objectively bad in some circumstances.)
    92  func connectFallback(
    93  	apiOpen api.OpenFunc, info *api.Info, fallbackPassword string,
    94  ) (
    95  	conn api.Connection, didFallback bool, err error,
    96  ) {
    97  
    98  	// We expect to assign to `conn`, `err`, *and* `info` in
    99  	// the course of this operation: wrapping this repeated
   100  	// atom in a func currently seems to be less treacherous
   101  	// than the alternatives.
   102  	var tryConnect = func() {
   103  		conn, err = apiOpen(info, api.DialOpts{})
   104  	}
   105  
   106  	// Try to connect, trying both the primary and fallback
   107  	// passwords if necessary; and update info, and remember
   108  	// which password we used.
   109  	tryConnect()
   110  	if params.IsCodeUnauthorized(err) {
   111  		// We've perhaps used the wrong password, so
   112  		// try again with the fallback password.
   113  		infoCopy := *info
   114  		info = &infoCopy
   115  		info.Password = fallbackPassword
   116  		didFallback = true
   117  		tryConnect()
   118  	}
   119  
   120  	// We might be a machine agent that's started before its
   121  	// provisioner has had a chance to report instance data
   122  	// to the machine; wait a fair while to ensure we really
   123  	// are in the (expected rare) provisioner-crash situation
   124  	// that would cause permanent CodeNotProvisioned (which
   125  	// indicates that the controller has forgotten about us,
   126  	// and is provisioning a new instance, so we really should
   127  	// uninstall).
   128  	//
   129  	// Yes, it's dumb that this can't be interrupted, and that
   130  	// it's not configurable without patching.
   131  	if params.IsCodeNotProvisioned(err) {
   132  		for a := checkProvisionedStrategy.Start(); a.Next(); {
   133  			tryConnect()
   134  			if !params.IsCodeNotProvisioned(err) {
   135  				break
   136  			}
   137  		}
   138  	}
   139  
   140  	// At this point we've run out of reasons to retry connecting,
   141  	// and just go with whatever error we last saw (if any).
   142  	if err != nil {
   143  		return nil, false, errors.Trace(err)
   144  	}
   145  	return conn, didFallback, nil
   146  }
   147  
   148  // ScaryConnect logs into the API using the supplied agent's credentials,
   149  // like OnlyConnect; and then:
   150  //
   151  //   * returns ErrConnectImpossible if the agent entity is dead or
   152  //     unauthorized for all known passwords;
   153  //   * if the agent's config does not specify a model, tries to record the
   154  //     model we just connected to;
   155  //   * replaces insecure credentials with freshly (locally) generated ones
   156  //     (and returns ErrPasswordChanged, expecting to be reinvoked);
   157  //   * unconditionally resets the remote-state password to its current value
   158  //     (for what seems like a bad reason).
   159  //
   160  // This is clearly a mess but at least now it's a documented and localized
   161  // mess; it should be used only when making the primary API connection for
   162  // a machine or unit agent running in its own process.
   163  func ScaryConnect(a agent.Agent, apiOpen api.OpenFunc) (_ api.Connection, err error) {
   164  	agentConfig := a.CurrentConfig()
   165  	info, ok := agentConfig.APIInfo()
   166  	if !ok {
   167  		return nil, errors.New("API info not available")
   168  	}
   169  	oldPassword := agentConfig.OldPassword()
   170  
   171  	defer func() {
   172  		cause := errors.Cause(err)
   173  		switch {
   174  		case cause == apiagent.ErrDenied:
   175  		case cause == errAgentEntityDead:
   176  		case params.IsCodeUnauthorized(cause):
   177  		case params.IsCodeNotProvisioned(cause):
   178  		default:
   179  			return
   180  		}
   181  		err = ErrConnectImpossible
   182  	}()
   183  
   184  	// Start connection...
   185  	conn, usedOldPassword, err := connectFallback(apiOpen, info, oldPassword)
   186  	if err != nil {
   187  		return nil, errors.Trace(err)
   188  	}
   189  
   190  	// ...and make sure we close it if anything goes wrong.
   191  	defer func() {
   192  		if err != nil {
   193  			if err := conn.Close(); err != nil {
   194  				logger.Errorf("while closing API connection: %v", err)
   195  			}
   196  		}
   197  	}()
   198  
   199  	// Update the agent config if necessary; this should just read the
   200  	// conn's properties, rather than making api calls, so we don't
   201  	// need to think about facades yet.
   202  	maybeSetAgentModelTag(a, conn)
   203  
   204  	// newConnFacade is patched out in export_test, because exhaustion.
   205  	// proper config/params struct would be better.
   206  	facade, err := newConnFacade(conn)
   207  	if err != nil {
   208  		return nil, errors.Trace(err)
   209  	}
   210  
   211  	// First of all, see if we're dead or removed, which will render
   212  	// any further work pointless.
   213  	entity := agentConfig.Tag()
   214  	life, err := facade.Life(entity)
   215  	if err != nil {
   216  		return nil, errors.Trace(err)
   217  	}
   218  	switch life {
   219  	case apiagent.Alive, apiagent.Dying:
   220  	case apiagent.Dead:
   221  		return nil, errAgentEntityDead
   222  	default:
   223  		return nil, errors.Errorf("unknown life value %q", life)
   224  	}
   225  
   226  	// If we need to change the password, it's far cleaner to
   227  	// exit with ErrChangedPassword and depend on the framework
   228  	// for expeditious retry than it is to mess around with those
   229  	// responsibilities in here.
   230  	if usedOldPassword {
   231  		logger.Debugf("changing password...")
   232  		err := changePassword(oldPassword, a, facade)
   233  		if err != nil {
   234  			return nil, errors.Trace(err)
   235  		}
   236  		logger.Debugf("password changed")
   237  		return nil, ErrChangedPassword
   238  	}
   239  
   240  	// If we *didn't* need to change the password, we apparently need
   241  	// to reset our password to its current value anyway. Reportedly,
   242  	// a machine agent promoted to controller status might have bad
   243  	// auth data in mongodb, and this "fixes" it... but this is scary,
   244  	// wrong, coincidental duct tape. The RTTD is to make controller-
   245  	// promotion work correctly in the first place.
   246  	//
   247  	// Still, can't fix everything at once.
   248  	if err := facade.SetPassword(entity, info.Password); err != nil {
   249  		return nil, errors.Annotate(err, "can't reset agent password")
   250  	}
   251  	return conn, nil
   252  }
   253  
   254  // maybeSetAgentModelTag tries to update the agent configuration if
   255  // it's missing a model tag. It doesn't *really* matter if it fails,
   256  // because we can demonstrably connect without it, so we log any
   257  // errors encountered and never return any to the client.
   258  func maybeSetAgentModelTag(a agent.Agent, conn api.Connection) {
   259  	if a.CurrentConfig().Model().Id() == "" {
   260  		err := a.ChangeConfig(func(setter agent.ConfigSetter) error {
   261  			modelTag, err := conn.ModelTag()
   262  			if err != nil {
   263  				return errors.Annotate(err, "no model uuid set on api")
   264  			}
   265  			return setter.Migrate(agent.MigrateParams{
   266  				Model: modelTag,
   267  			})
   268  		})
   269  		if err != nil {
   270  			logger.Warningf("unable to save model uuid: %v", err)
   271  			// Not really fatal, just annoying.
   272  		}
   273  	}
   274  }
   275  
   276  // changePassword generates a new random password and records it in
   277  // local agent configuration and on the remote state server. The supplied
   278  // oldPassword -- which must be the current valid password -- is set as a
   279  // fallback in local config, in case we fail to update the remote password.
   280  func changePassword(oldPassword string, a agent.Agent, facade apiagent.ConnFacade) error {
   281  	newPassword, err := utils.RandomPassword()
   282  	if err != nil {
   283  		return errors.Trace(err)
   284  	}
   285  	if err := a.ChangeConfig(func(c agent.ConfigSetter) error {
   286  		c.SetPassword(newPassword)
   287  		c.SetOldPassword(oldPassword)
   288  		return nil
   289  	}); err != nil {
   290  		return err
   291  	}
   292  	// This has to happen *after* we record the old/new passwords
   293  	// locally, lest we change it remotely, crash suddenly, and
   294  	// end up locked out forever.
   295  	return facade.SetPassword(a.CurrentConfig().Tag(), newPassword)
   296  }