github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/apicaller/connect.go (about)

     1  // Copyright 2012-2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package apicaller
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"github.com/juju/utils"
    11  
    12  	"github.com/juju/juju/agent"
    13  	"github.com/juju/juju/api"
    14  	apiagent "github.com/juju/juju/api/agent"
    15  	"github.com/juju/juju/apiserver/common"
    16  	"github.com/juju/juju/apiserver/params"
    17  )
    18  
    19  var (
    20  	// checkProvisionedStrategy defines the evil uninterruptible
    21  	// retry strategy for "handling" ErrNotProvisioned. It exists
    22  	// in the name of stability; as the code evolves, it would be
    23  	// great to see its function moved up a level or two.
    24  	//
    25  	// TODO(katco): 2016-08-09: lp:1611427
    26  	checkProvisionedStrategy = utils.AttemptStrategy{
    27  		Total: 10 * time.Minute,
    28  		Delay: 5 * time.Second,
    29  	}
    30  
    31  	// newConnFacade should similarly move up a level so it can
    32  	// be explicitly configured without export_test hackery
    33  	newConnFacade = apiagent.NewConnFacade
    34  
    35  	// errAgentEntityDead is an internal error returned by getEntity.
    36  	errAgentEntityDead = errors.New("agent entity is dead")
    37  
    38  	// ErrConnectImpossible indicates that we can contact an apiserver
    39  	// but have no hope of authenticating a connection with it.
    40  	ErrConnectImpossible = errors.New("connection permanently impossible")
    41  
    42  	// ErrChangedPassword indicates that the agent config used to connect
    43  	// has been updated with a new password, and you should try again.
    44  	ErrChangedPassword = errors.New("insecure password replaced; retry")
    45  )
    46  
    47  // OnlyConnect logs into the API using the supplied agent's credentials.
    48  func OnlyConnect(a agent.Agent, apiOpen api.OpenFunc) (api.Connection, error) {
    49  	agentConfig := a.CurrentConfig()
    50  	info, ok := agentConfig.APIInfo()
    51  	if !ok {
    52  		return nil, errors.New("API info not available")
    53  	}
    54  	conn, _, err := connectFallback(apiOpen, info, agentConfig.OldPassword())
    55  	if err != nil {
    56  		return nil, errors.Trace(err)
    57  	}
    58  	return conn, nil
    59  }
    60  
    61  // connectFallback opens an API connection using the supplied info,
    62  // or a copy using the fallbackPassword; blocks for up to 5 minutes
    63  // if it encounters a CodeNotProvisioned error, periodically retrying;
    64  // and eventually, having either succeeded, failed, or timed out, returns:
    65  //
    66  //   * (if successful) the connection, and whether the fallback was used
    67  //   * (otherwise) whatever error it most recently encountered
    68  //
    69  // It's clear that it still has machine-agent concerns still baked in,
    70  // but there's no obvious practical path to separating those entirely at
    71  // the moment.
    72  //
    73  // (The right answer is probably to treat CodeNotProvisioned as a normal
    74  // error and depend on (currently nonexistent) exponential backoff in
    75  // the framework: either it'll work soon enough, or the controller will
    76  // spot the error and nuke the machine anyway. No harm leaving the local
    77  // agent running and occasionally polling for changes -- it won't do much
    78  // until it's managed to log in, and any suicide-cutoff point we pick here
    79  // will be objectively bad in some circumstances.)
    80  func connectFallback(
    81  	apiOpen api.OpenFunc, info *api.Info, fallbackPassword string,
    82  ) (
    83  	conn api.Connection, didFallback bool, err error,
    84  ) {
    85  
    86  	// We expect to assign to `conn`, `err`, *and* `info` in
    87  	// the course of this operation: wrapping this repeated
    88  	// atom in a func currently seems to be less treacherous
    89  	// than the alternatives.
    90  	var tryConnect = func() {
    91  		conn, err = apiOpen(info, api.DialOpts{})
    92  	}
    93  
    94  	didFallback = info.Password == ""
    95  	// Try to connect, trying both the primary and fallback
    96  	// passwords if necessary; and update info, and remember
    97  	// which password we used.
    98  	if !didFallback {
    99  		logger.Debugf("connecting with current password")
   100  		tryConnect()
   101  		if params.IsCodeUnauthorized(err) || errors.Cause(err) == common.ErrBadCreds {
   102  			didFallback = true
   103  
   104  		}
   105  	}
   106  	if didFallback {
   107  		// We've perhaps used the wrong password, so
   108  		// try again with the fallback password.
   109  		infoCopy := *info
   110  		info = &infoCopy
   111  		info.Password = fallbackPassword
   112  		logger.Debugf("connecting with old password")
   113  		tryConnect()
   114  	}
   115  
   116  	// We might be a machine agent that's started before its
   117  	// provisioner has had a chance to report instance data
   118  	// to the machine; wait a fair while to ensure we really
   119  	// are in the (expected rare) provisioner-crash situation
   120  	// that would cause permanent CodeNotProvisioned (which
   121  	// indicates that the controller has forgotten about us,
   122  	// and is provisioning a new instance, so we really should
   123  	// uninstall).
   124  	//
   125  	// Yes, it's dumb that this can't be interrupted, and that
   126  	// it's not configurable without patching.
   127  	if params.IsCodeNotProvisioned(err) {
   128  		for a := checkProvisionedStrategy.Start(); a.Next(); {
   129  			tryConnect()
   130  			if !params.IsCodeNotProvisioned(err) {
   131  				break
   132  			}
   133  		}
   134  	}
   135  
   136  	// At this point we've run out of reasons to retry connecting,
   137  	// and just go with whatever error we last saw (if any).
   138  	if err != nil {
   139  		logger.Debugf("failed to connect")
   140  		return nil, false, errors.Trace(err)
   141  	}
   142  	logger.Debugf("connected")
   143  	return conn, didFallback, nil
   144  }
   145  
   146  // ScaryConnect logs into the API using the supplied agent's credentials,
   147  // like OnlyConnect; and then:
   148  //
   149  //   * returns ErrConnectImpossible if the agent entity is dead or
   150  //     unauthorized for all known passwords;
   151  //   * replaces insecure credentials with freshly (locally) generated ones
   152  //     (and returns ErrPasswordChanged, expecting to be reinvoked);
   153  //   * unconditionally resets the remote-state password to its current value
   154  //     (for what seems like a bad reason).
   155  //
   156  // This is clearly a mess but at least now it's a documented and localized
   157  // mess; it should be used only when making the primary API connection for
   158  // a machine or unit agent running in its own process.
   159  func ScaryConnect(a agent.Agent, apiOpen api.OpenFunc) (_ api.Connection, err error) {
   160  	agentConfig := a.CurrentConfig()
   161  	info, ok := agentConfig.APIInfo()
   162  	if !ok {
   163  		return nil, errors.New("API info not available")
   164  	}
   165  	oldPassword := agentConfig.OldPassword()
   166  
   167  	defer func() {
   168  		cause := errors.Cause(err)
   169  		switch {
   170  		case cause == apiagent.ErrDenied:
   171  		case cause == errAgentEntityDead:
   172  		case params.IsCodeUnauthorized(cause):
   173  		case params.IsCodeNotProvisioned(cause):
   174  		default:
   175  			return
   176  		}
   177  		err = ErrConnectImpossible
   178  	}()
   179  
   180  	// Start connection...
   181  	conn, usedOldPassword, err := connectFallback(apiOpen, info, oldPassword)
   182  	if err != nil {
   183  		return nil, errors.Trace(err)
   184  	}
   185  
   186  	// ...and make sure we close it if anything goes wrong.
   187  	defer func() {
   188  		if err != nil {
   189  			if err := conn.Close(); err != nil {
   190  				logger.Errorf("while closing API connection: %v", err)
   191  			}
   192  		}
   193  	}()
   194  
   195  	// newConnFacade is patched out in export_test, because exhaustion.
   196  	// proper config/params struct would be better.
   197  	facade, err := newConnFacade(conn)
   198  	if err != nil {
   199  		return nil, errors.Trace(err)
   200  	}
   201  
   202  	// First of all, see if we're dead or removed, which will render
   203  	// any further work pointless.
   204  	entity := agentConfig.Tag()
   205  	life, err := facade.Life(entity)
   206  	if err != nil {
   207  		return nil, errors.Trace(err)
   208  	}
   209  	switch life {
   210  	case apiagent.Alive, apiagent.Dying:
   211  	case apiagent.Dead:
   212  		return nil, errAgentEntityDead
   213  	default:
   214  		return nil, errors.Errorf("unknown life value %q", life)
   215  	}
   216  
   217  	// If we need to change the password, it's far cleaner to
   218  	// exit with ErrChangedPassword and depend on the framework
   219  	// for expeditious retry than it is to mess around with those
   220  	// responsibilities in here.
   221  	if usedOldPassword {
   222  		logger.Debugf("changing password...")
   223  		err := changePassword(oldPassword, a, facade)
   224  		if err != nil {
   225  			return nil, errors.Trace(err)
   226  		}
   227  		logger.Debugf("password changed")
   228  		return nil, ErrChangedPassword
   229  	}
   230  
   231  	// If we *didn't* need to change the password, we apparently need
   232  	// to reset our password to its current value anyway. Reportedly,
   233  	// a machine agent promoted to controller status might have bad
   234  	// auth data in mongodb, and this "fixes" it... but this is scary,
   235  	// wrong, coincidental duct tape. The RTTD is to make controller-
   236  	// promotion work correctly in the first place.
   237  	//
   238  	// Still, can't fix everything at once.
   239  	if err := facade.SetPassword(entity, info.Password); err != nil {
   240  		return nil, errors.Annotate(err, "can't reset agent password")
   241  	}
   242  	return conn, nil
   243  }
   244  
   245  // changePassword generates a new random password and records it in
   246  // local agent configuration and on the remote state server. The supplied
   247  // oldPassword -- which must be the current valid password -- is set as a
   248  // fallback in local config, in case we fail to update the remote password.
   249  func changePassword(oldPassword string, a agent.Agent, facade apiagent.ConnFacade) error {
   250  	newPassword, err := utils.RandomPassword()
   251  	if err != nil {
   252  		return errors.Trace(err)
   253  	}
   254  	if err := a.ChangeConfig(func(c agent.ConfigSetter) error {
   255  		c.SetPassword(newPassword)
   256  		c.SetOldPassword(oldPassword)
   257  		return nil
   258  	}); err != nil {
   259  		return err
   260  	}
   261  	// This has to happen *after* we record the old/new passwords
   262  	// locally, lest we change it remotely, crash suddenly, and
   263  	// end up locked out forever.
   264  	return facade.SetPassword(a.CurrentConfig().Tag(), newPassword)
   265  }