github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/apicaller/connect.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package apicaller 5 6 import ( 7 "time" 8 9 "github.com/juju/errors" 10 "github.com/juju/utils" 11 12 "github.com/juju/juju/agent" 13 "github.com/juju/juju/api" 14 apiagent "github.com/juju/juju/api/agent" 15 "github.com/juju/juju/apiserver/common" 16 "github.com/juju/juju/apiserver/params" 17 ) 18 19 var ( 20 // checkProvisionedStrategy defines the evil uninterruptible 21 // retry strategy for "handling" ErrNotProvisioned. It exists 22 // in the name of stability; as the code evolves, it would be 23 // great to see its function moved up a level or two. 24 // 25 // TODO(katco): 2016-08-09: lp:1611427 26 checkProvisionedStrategy = utils.AttemptStrategy{ 27 Total: 10 * time.Minute, 28 Delay: 5 * time.Second, 29 } 30 31 // newConnFacade should similarly move up a level so it can 32 // be explicitly configured without export_test hackery 33 newConnFacade = apiagent.NewConnFacade 34 35 // errAgentEntityDead is an internal error returned by getEntity. 36 errAgentEntityDead = errors.New("agent entity is dead") 37 38 // ErrConnectImpossible indicates that we can contact an apiserver 39 // but have no hope of authenticating a connection with it. 40 ErrConnectImpossible = errors.New("connection permanently impossible") 41 42 // ErrChangedPassword indicates that the agent config used to connect 43 // has been updated with a new password, and you should try again. 44 ErrChangedPassword = errors.New("insecure password replaced; retry") 45 ) 46 47 // OnlyConnect logs into the API using the supplied agent's credentials. 48 func OnlyConnect(a agent.Agent, apiOpen api.OpenFunc) (api.Connection, error) { 49 agentConfig := a.CurrentConfig() 50 info, ok := agentConfig.APIInfo() 51 if !ok { 52 return nil, errors.New("API info not available") 53 } 54 conn, _, err := connectFallback(apiOpen, info, agentConfig.OldPassword()) 55 if err != nil { 56 return nil, errors.Trace(err) 57 } 58 return conn, nil 59 } 60 61 // connectFallback opens an API connection using the supplied info, 62 // or a copy using the fallbackPassword; blocks for up to 5 minutes 63 // if it encounters a CodeNotProvisioned error, periodically retrying; 64 // and eventually, having either succeeded, failed, or timed out, returns: 65 // 66 // * (if successful) the connection, and whether the fallback was used 67 // * (otherwise) whatever error it most recently encountered 68 // 69 // It's clear that it still has machine-agent concerns still baked in, 70 // but there's no obvious practical path to separating those entirely at 71 // the moment. 72 // 73 // (The right answer is probably to treat CodeNotProvisioned as a normal 74 // error and depend on (currently nonexistent) exponential backoff in 75 // the framework: either it'll work soon enough, or the controller will 76 // spot the error and nuke the machine anyway. No harm leaving the local 77 // agent running and occasionally polling for changes -- it won't do much 78 // until it's managed to log in, and any suicide-cutoff point we pick here 79 // will be objectively bad in some circumstances.) 80 func connectFallback( 81 apiOpen api.OpenFunc, info *api.Info, fallbackPassword string, 82 ) ( 83 conn api.Connection, didFallback bool, err error, 84 ) { 85 86 // We expect to assign to `conn`, `err`, *and* `info` in 87 // the course of this operation: wrapping this repeated 88 // atom in a func currently seems to be less treacherous 89 // than the alternatives. 90 var tryConnect = func() { 91 conn, err = apiOpen(info, api.DialOpts{}) 92 } 93 94 didFallback = info.Password == "" 95 // Try to connect, trying both the primary and fallback 96 // passwords if necessary; and update info, and remember 97 // which password we used. 98 if !didFallback { 99 logger.Debugf("connecting with current password") 100 tryConnect() 101 if params.IsCodeUnauthorized(err) || errors.Cause(err) == common.ErrBadCreds { 102 didFallback = true 103 104 } 105 } 106 if didFallback { 107 // We've perhaps used the wrong password, so 108 // try again with the fallback password. 109 infoCopy := *info 110 info = &infoCopy 111 info.Password = fallbackPassword 112 logger.Debugf("connecting with old password") 113 tryConnect() 114 } 115 116 // We might be a machine agent that's started before its 117 // provisioner has had a chance to report instance data 118 // to the machine; wait a fair while to ensure we really 119 // are in the (expected rare) provisioner-crash situation 120 // that would cause permanent CodeNotProvisioned (which 121 // indicates that the controller has forgotten about us, 122 // and is provisioning a new instance, so we really should 123 // uninstall). 124 // 125 // Yes, it's dumb that this can't be interrupted, and that 126 // it's not configurable without patching. 127 if params.IsCodeNotProvisioned(err) { 128 for a := checkProvisionedStrategy.Start(); a.Next(); { 129 tryConnect() 130 if !params.IsCodeNotProvisioned(err) { 131 break 132 } 133 } 134 } 135 136 // At this point we've run out of reasons to retry connecting, 137 // and just go with whatever error we last saw (if any). 138 if err != nil { 139 logger.Debugf("failed to connect") 140 return nil, false, errors.Trace(err) 141 } 142 logger.Debugf("connected") 143 return conn, didFallback, nil 144 } 145 146 // ScaryConnect logs into the API using the supplied agent's credentials, 147 // like OnlyConnect; and then: 148 // 149 // * returns ErrConnectImpossible if the agent entity is dead or 150 // unauthorized for all known passwords; 151 // * replaces insecure credentials with freshly (locally) generated ones 152 // (and returns ErrPasswordChanged, expecting to be reinvoked); 153 // * unconditionally resets the remote-state password to its current value 154 // (for what seems like a bad reason). 155 // 156 // This is clearly a mess but at least now it's a documented and localized 157 // mess; it should be used only when making the primary API connection for 158 // a machine or unit agent running in its own process. 159 func ScaryConnect(a agent.Agent, apiOpen api.OpenFunc) (_ api.Connection, err error) { 160 agentConfig := a.CurrentConfig() 161 info, ok := agentConfig.APIInfo() 162 if !ok { 163 return nil, errors.New("API info not available") 164 } 165 oldPassword := agentConfig.OldPassword() 166 167 defer func() { 168 cause := errors.Cause(err) 169 switch { 170 case cause == apiagent.ErrDenied: 171 case cause == errAgentEntityDead: 172 case params.IsCodeUnauthorized(cause): 173 case params.IsCodeNotProvisioned(cause): 174 default: 175 return 176 } 177 err = ErrConnectImpossible 178 }() 179 180 // Start connection... 181 conn, usedOldPassword, err := connectFallback(apiOpen, info, oldPassword) 182 if err != nil { 183 return nil, errors.Trace(err) 184 } 185 186 // ...and make sure we close it if anything goes wrong. 187 defer func() { 188 if err != nil { 189 if err := conn.Close(); err != nil { 190 logger.Errorf("while closing API connection: %v", err) 191 } 192 } 193 }() 194 195 // newConnFacade is patched out in export_test, because exhaustion. 196 // proper config/params struct would be better. 197 facade, err := newConnFacade(conn) 198 if err != nil { 199 return nil, errors.Trace(err) 200 } 201 202 // First of all, see if we're dead or removed, which will render 203 // any further work pointless. 204 entity := agentConfig.Tag() 205 life, err := facade.Life(entity) 206 if err != nil { 207 return nil, errors.Trace(err) 208 } 209 switch life { 210 case apiagent.Alive, apiagent.Dying: 211 case apiagent.Dead: 212 return nil, errAgentEntityDead 213 default: 214 return nil, errors.Errorf("unknown life value %q", life) 215 } 216 217 // If we need to change the password, it's far cleaner to 218 // exit with ErrChangedPassword and depend on the framework 219 // for expeditious retry than it is to mess around with those 220 // responsibilities in here. 221 if usedOldPassword { 222 logger.Debugf("changing password...") 223 err := changePassword(oldPassword, a, facade) 224 if err != nil { 225 return nil, errors.Trace(err) 226 } 227 logger.Debugf("password changed") 228 return nil, ErrChangedPassword 229 } 230 231 // If we *didn't* need to change the password, we apparently need 232 // to reset our password to its current value anyway. Reportedly, 233 // a machine agent promoted to controller status might have bad 234 // auth data in mongodb, and this "fixes" it... but this is scary, 235 // wrong, coincidental duct tape. The RTTD is to make controller- 236 // promotion work correctly in the first place. 237 // 238 // Still, can't fix everything at once. 239 if err := facade.SetPassword(entity, info.Password); err != nil { 240 return nil, errors.Annotate(err, "can't reset agent password") 241 } 242 return conn, nil 243 } 244 245 // changePassword generates a new random password and records it in 246 // local agent configuration and on the remote state server. The supplied 247 // oldPassword -- which must be the current valid password -- is set as a 248 // fallback in local config, in case we fail to update the remote password. 249 func changePassword(oldPassword string, a agent.Agent, facade apiagent.ConnFacade) error { 250 newPassword, err := utils.RandomPassword() 251 if err != nil { 252 return errors.Trace(err) 253 } 254 if err := a.ChangeConfig(func(c agent.ConfigSetter) error { 255 c.SetPassword(newPassword) 256 c.SetOldPassword(oldPassword) 257 return nil 258 }); err != nil { 259 return err 260 } 261 // This has to happen *after* we record the old/new passwords 262 // locally, lest we change it remotely, crash suddenly, and 263 // end up locked out forever. 264 return facade.SetPassword(a.CurrentConfig().Tag(), newPassword) 265 }