github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/apicaller/connect.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package apicaller 5 6 import ( 7 "time" 8 9 "github.com/juju/errors" 10 "github.com/juju/utils" 11 "gopkg.in/juju/names.v2" 12 13 "github.com/juju/juju/agent" 14 "github.com/juju/juju/api" 15 apiagent "github.com/juju/juju/api/agent" 16 "github.com/juju/juju/apiserver/common" 17 "github.com/juju/juju/apiserver/params" 18 ) 19 20 var ( 21 // checkProvisionedStrategy defines the evil uninterruptible 22 // retry strategy for "handling" ErrNotProvisioned. It exists 23 // in the name of stability; as the code evolves, it would be 24 // great to see its function moved up a level or two. 25 // 26 // TODO(katco): 2016-08-09: lp:1611427 27 checkProvisionedStrategy = utils.AttemptStrategy{ 28 Total: 10 * time.Minute, 29 Delay: 5 * time.Second, 30 } 31 32 // newConnFacade should similarly move up a level so it can 33 // be explicitly configured without export_test hackery 34 newConnFacade = apiagent.NewConnFacade 35 36 // errAgentEntityDead is an internal error returned by getEntity. 37 errAgentEntityDead = errors.New("agent entity is dead") 38 39 // ErrConnectImpossible indicates that we can contact an apiserver 40 // but have no hope of authenticating a connection with it. 41 ErrConnectImpossible = errors.New("connection permanently impossible") 42 43 // ErrChangedPassword indicates that the agent config used to connect 44 // has been updated with a new password, and you should try again. 45 ErrChangedPassword = errors.New("insecure password replaced; retry") 46 ) 47 48 // OnlyConnect logs into the API using the supplied agent's credentials. 49 func OnlyConnect(a agent.Agent, apiOpen api.OpenFunc) (api.Connection, error) { 50 agentConfig := a.CurrentConfig() 51 info, ok := agentConfig.APIInfo() 52 if !ok { 53 return nil, errors.New("API info not available") 54 } 55 conn, _, err := connectFallback(apiOpen, info, agentConfig.OldPassword()) 56 if err != nil { 57 return nil, errors.Trace(err) 58 } 59 return conn, nil 60 } 61 62 // connectFallback opens an API connection using the supplied info, 63 // or a copy using the fallbackPassword; blocks for up to 5 minutes 64 // if it encounters a CodeNotProvisioned error, periodically retrying; 65 // and eventually, having either succeeded, failed, or timed out, returns: 66 // 67 // * (if successful) the connection, and whether the fallback was used 68 // * (otherwise) whatever error it most recently encountered 69 // 70 // It's clear that it still has machine-agent concerns still baked in, 71 // but there's no obvious practical path to separating those entirely at 72 // the moment. 73 // 74 // (The right answer is probably to treat CodeNotProvisioned as a normal 75 // error and depend on (currently nonexistent) exponential backoff in 76 // the framework: either it'll work soon enough, or the controller will 77 // spot the error and nuke the machine anyway. No harm leaving the local 78 // agent running and occasionally polling for changes -- it won't do much 79 // until it's managed to log in, and any suicide-cutoff point we pick here 80 // will be objectively bad in some circumstances.) 81 func connectFallback( 82 apiOpen api.OpenFunc, info *api.Info, fallbackPassword string, 83 ) ( 84 conn api.Connection, didFallback bool, err error, 85 ) { 86 // We expect to assign to `conn`, `err`, *and* `info` in 87 // the course of this operation: wrapping this repeated 88 // atom in a func currently seems to be less treacherous 89 // than the alternatives. 90 var tryConnect = func() { 91 conn, err = apiOpen(info, api.DialOpts{ 92 // The DialTimeout is for connecting to the underlying 93 // socket. We use three seconds because it should be fast 94 // but it is possible to add a manual machine to a distant 95 // controller such that the round trip time could be as high 96 // as 500ms. 97 DialTimeout: 3 * time.Second, 98 // The delay between connecting to a different controller. Setting this to 0 means we try all controllers 99 // simultaneously. We set it to approximately how long the TLS handshake takes, to avoid doing TLS 100 // handshakes to a controller that we are going to end up ignoring. 101 DialAddressInterval: 200 * time.Millisecond, 102 // The timeout is for the complete login handshake. 103 // If the server is rate limiting, it will normally pause 104 // before responding to the login request, but the pause is 105 // in the realm of five to ten seconds. 106 Timeout: time.Minute, 107 }) 108 } 109 110 didFallback = info.Password == "" 111 // Try to connect, trying both the primary and fallback 112 // passwords if necessary; and update info, and remember 113 // which password we used. 114 if !didFallback { 115 logger.Debugf("connecting with current password") 116 tryConnect() 117 if params.IsCodeUnauthorized(err) || errors.Cause(err) == common.ErrBadCreds { 118 didFallback = true 119 120 } 121 } 122 if didFallback { 123 // We've perhaps used the wrong password, so 124 // try again with the fallback password. 125 infoCopy := *info 126 info = &infoCopy 127 info.Password = fallbackPassword 128 logger.Debugf("connecting with old password") 129 tryConnect() 130 } 131 132 // We might be a machine agent that's started before its 133 // provisioner has had a chance to report instance data 134 // to the machine; wait a fair while to ensure we really 135 // are in the (expected rare) provisioner-crash situation 136 // that would cause permanent CodeNotProvisioned (which 137 // indicates that the controller has forgotten about us, 138 // and is provisioning a new instance, so we really should 139 // uninstall). 140 // 141 // Yes, it's dumb that this can't be interrupted, and that 142 // it's not configurable without patching. 143 if params.IsCodeNotProvisioned(err) { 144 for a := checkProvisionedStrategy.Start(); a.Next(); { 145 tryConnect() 146 if !params.IsCodeNotProvisioned(err) { 147 break 148 } 149 } 150 } 151 152 // At this point we've run out of reasons to retry connecting, 153 // and just go with whatever error we last saw (if any). 154 if err != nil { 155 logger.Debugf("[%s] failed to connect", shortModelUUID(info.ModelTag)) 156 return nil, false, errors.Trace(err) 157 } 158 logger.Infof("[%s] %q successfully connected to %q", 159 shortModelUUID(info.ModelTag), 160 info.Tag.String(), 161 conn.Addr()) 162 return conn, didFallback, nil 163 } 164 165 func shortModelUUID(model names.ModelTag) string { 166 uuid := model.Id() 167 if len(uuid) > 6 { 168 return uuid[:6] 169 } 170 return uuid 171 } 172 173 // ScaryConnect logs into the API using the supplied agent's credentials, 174 // like OnlyConnect; and then: 175 // 176 // * returns ErrConnectImpossible if the agent entity is dead or 177 // unauthorized for all known passwords; 178 // * replaces insecure credentials with freshly (locally) generated ones 179 // (and returns ErrPasswordChanged, expecting to be reinvoked); 180 // * unconditionally resets the remote-state password to its current value 181 // (for what seems like a bad reason). 182 // 183 // This is clearly a mess but at least now it's a documented and localized 184 // mess; it should be used only when making the primary API connection for 185 // a machine or unit agent running in its own process. 186 func ScaryConnect(a agent.Agent, apiOpen api.OpenFunc) (_ api.Connection, err error) { 187 agentConfig := a.CurrentConfig() 188 info, ok := agentConfig.APIInfo() 189 if !ok { 190 return nil, errors.New("API info not available") 191 } 192 oldPassword := agentConfig.OldPassword() 193 194 defer func() { 195 cause := errors.Cause(err) 196 switch { 197 case cause == apiagent.ErrDenied: 198 case cause == errAgentEntityDead: 199 case params.IsCodeUnauthorized(cause): 200 case params.IsCodeNotProvisioned(cause): 201 default: 202 return 203 } 204 err = ErrConnectImpossible 205 }() 206 207 // Start connection... 208 conn, usedOldPassword, err := connectFallback(apiOpen, info, oldPassword) 209 if err != nil { 210 return nil, errors.Trace(err) 211 } 212 213 // ...and make sure we close it if anything goes wrong. 214 defer func() { 215 if err != nil { 216 if err := conn.Close(); err != nil { 217 logger.Errorf("while closing API connection: %v", err) 218 } 219 } 220 }() 221 222 // newConnFacade is patched out in export_test, because exhaustion. 223 // proper config/params struct would be better. 224 facade, err := newConnFacade(conn) 225 if err != nil { 226 return nil, errors.Trace(err) 227 } 228 229 // First of all, see if we're dead or removed, which will render 230 // any further work pointless. 231 entity := agentConfig.Tag() 232 life, err := facade.Life(entity) 233 if err != nil { 234 return nil, errors.Trace(err) 235 } 236 switch life { 237 case apiagent.Alive, apiagent.Dying: 238 case apiagent.Dead: 239 return nil, errAgentEntityDead 240 default: 241 return nil, errors.Errorf("unknown life value %q", life) 242 } 243 244 // If we need to change the password, it's far cleaner to 245 // exit with ErrChangedPassword and depend on the framework 246 // for expeditious retry than it is to mess around with those 247 // responsibilities in here. 248 if usedOldPassword { 249 logger.Debugf("changing password...") 250 err := changePassword(oldPassword, a, facade) 251 if err != nil { 252 return nil, errors.Trace(err) 253 } 254 logger.Infof("[%s] password changed for %q", 255 shortModelUUID(agentConfig.Model()), entity.String()) 256 return nil, ErrChangedPassword 257 } 258 259 // If we *didn't* need to change the password, we apparently need 260 // to reset our password to its current value anyway. Reportedly, 261 // a machine agent promoted to controller status might have bad 262 // auth data in mongodb, and this "fixes" it... but this is scary, 263 // wrong, coincidental duct tape. The RTTD is to make controller- 264 // promotion work correctly in the first place. 265 // 266 // Still, can't fix everything at once. 267 if err := facade.SetPassword(entity, info.Password); err != nil { 268 return nil, errors.Annotate(err, "can't reset agent password") 269 } 270 return conn, nil 271 } 272 273 // changePassword generates a new random password and records it in 274 // local agent configuration and on the remote state server. The supplied 275 // oldPassword -- which must be the current valid password -- is set as a 276 // fallback in local config, in case we fail to update the remote password. 277 func changePassword(oldPassword string, a agent.Agent, facade apiagent.ConnFacade) error { 278 newPassword, err := utils.RandomPassword() 279 if err != nil { 280 return errors.Trace(err) 281 } 282 if err := a.ChangeConfig(func(c agent.ConfigSetter) error { 283 c.SetPassword(newPassword) 284 c.SetOldPassword(oldPassword) 285 return nil 286 }); err != nil { 287 return err 288 } 289 // This has to happen *after* we record the old/new passwords 290 // locally, lest we change it remotely, crash suddenly, and 291 // end up locked out forever. 292 return facade.SetPassword(a.CurrentConfig().Tag(), newPassword) 293 } 294 295 // NewExternalControllerConnectionFunc returns a function returning an 296 // api connection to a controller with the specified api info. 297 type NewExternalControllerConnectionFunc func(*api.Info) (api.Connection, error) 298 299 // NewExternalControllerConnection returns an api connection to a controller 300 // with the specified api info. 301 func NewExternalControllerConnection(apiInfo *api.Info) (api.Connection, error) { 302 return api.Open(apiInfo, api.DialOpts{ 303 Timeout: 2 * time.Second, 304 RetryDelay: 500 * time.Millisecond, 305 }) 306 }