github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "time" 13 14 "github.com/juju/loggo" 15 16 coreCloudinit "launchpad.net/juju-core/cloudinit" 17 "launchpad.net/juju-core/cloudinit/sshinit" 18 "launchpad.net/juju-core/constraints" 19 "launchpad.net/juju-core/environs" 20 "launchpad.net/juju-core/environs/bootstrap" 21 "launchpad.net/juju-core/environs/cloudinit" 22 "launchpad.net/juju-core/environs/config" 23 "launchpad.net/juju-core/instance" 24 coretools "launchpad.net/juju-core/tools" 25 "launchpad.net/juju-core/utils" 26 "launchpad.net/juju-core/utils/parallel" 27 "launchpad.net/juju-core/utils/ssh" 28 ) 29 30 var logger = loggo.GetLogger("juju.provider.common") 31 32 // Bootstrap is a common implementation of the Bootstrap method defined on 33 // environs.Environ; we strongly recommend that this implementation be used 34 // when writing a new provider. 35 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, cons constraints.Value) (err error) { 36 // TODO make safe in the case of racing Bootstraps 37 // If two Bootstraps are called concurrently, there's 38 // no way to make sure that only one succeeds. 39 40 var inst instance.Instance 41 defer func() { handleBootstrapError(err, ctx, inst, env) }() 42 43 // Get the bootstrap SSH client. Do this early, so we know 44 // not to bother with any of the below if we can't finish the job. 45 client := ssh.DefaultClient 46 if client == nil { 47 // This should never happen: if we don't have OpenSSH, then 48 // go.crypto/ssh should be used with an auto-generated key. 49 return fmt.Errorf("no SSH client available") 50 } 51 52 // Create an empty bootstrap state file so we can get its URL. 53 // It will be updated with the instance id and hardware characteristics 54 // after the bootstrap instance is started. 55 stateFileURL, err := bootstrap.CreateStateFile(env.Storage()) 56 if err != nil { 57 return err 58 } 59 60 privateKey, err := GenerateSystemSSHKey(env) 61 if err != nil { 62 return err 63 } 64 machineConfig := environs.NewBootstrapMachineConfig(stateFileURL, privateKey) 65 66 selectedTools, err := EnsureBootstrapTools(env, env.Config().DefaultSeries(), cons.Arch) 67 if err != nil { 68 return err 69 } 70 71 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 72 inst, hw, err := env.StartInstance(cons, selectedTools, machineConfig) 73 if err != nil { 74 return fmt.Errorf("cannot start bootstrap instance: %v", err) 75 } 76 fmt.Fprintf(ctx.GetStderr(), " - %s\n", inst.Id()) 77 78 var characteristics []instance.HardwareCharacteristics 79 if hw != nil { 80 characteristics = []instance.HardwareCharacteristics{*hw} 81 } 82 err = bootstrap.SaveState( 83 env.Storage(), 84 &bootstrap.BootstrapState{ 85 StateInstances: []instance.Id{inst.Id()}, 86 Characteristics: characteristics, 87 }) 88 if err != nil { 89 return fmt.Errorf("cannot save state: %v", err) 90 } 91 return FinishBootstrap(ctx, client, inst, machineConfig) 92 } 93 94 // GenerateSystemSSHKey creates a new key for the system identity. The 95 // authorized_keys in the environment config is updated to include the public 96 // key for the generated key. 97 func GenerateSystemSSHKey(env environs.Environ) (privateKey string, err error) { 98 logger.Debugf("generate a system ssh key") 99 // Create a new system ssh key and add that to the authorized keys. 100 privateKey, publicKey, err := ssh.GenerateKey(config.JujuSystemKey) 101 if err != nil { 102 return "", fmt.Errorf("failed to create system key: %v", err) 103 } 104 authorized_keys := config.ConcatAuthKeys(env.Config().AuthorizedKeys(), publicKey) 105 newConfig, err := env.Config().Apply(map[string]interface{}{ 106 config.AuthKeysConfig: authorized_keys, 107 }) 108 if err != nil { 109 return "", fmt.Errorf("failed to create new config: %v", err) 110 } 111 if err = env.SetConfig(newConfig); err != nil { 112 return "", fmt.Errorf("failed to set new config: %v", err) 113 } 114 return privateKey, nil 115 } 116 117 // handelBootstrapError cleans up after a failed bootstrap. 118 func handleBootstrapError(err error, ctx environs.BootstrapContext, inst instance.Instance, env environs.Environ) { 119 if err == nil { 120 return 121 } 122 123 logger.Errorf("bootstrap failed: %v", err) 124 ch := make(chan os.Signal, 1) 125 ctx.InterruptNotify(ch) 126 defer ctx.StopInterruptNotify(ch) 127 defer close(ch) 128 go func() { 129 for _ = range ch { 130 fmt.Fprintln(ctx.GetStderr(), "Cleaning up failed bootstrap") 131 } 132 }() 133 134 if inst != nil { 135 fmt.Fprintln(ctx.GetStderr(), "Stopping instance...") 136 if stoperr := env.StopInstances([]instance.Instance{inst}); stoperr != nil { 137 logger.Errorf("cannot stop failed bootstrap instance %q: %v", inst.Id(), stoperr) 138 } else { 139 // set to nil so we know we can safely delete the state file 140 inst = nil 141 } 142 } 143 // We only delete the bootstrap state file if either we didn't 144 // start an instance, or we managed to cleanly stop it. 145 if inst == nil { 146 if rmerr := bootstrap.DeleteStateFile(env.Storage()); rmerr != nil { 147 logger.Errorf("cannot delete bootstrap state file: %v", rmerr) 148 } 149 } 150 } 151 152 // FinishBootstrap completes the bootstrap process by connecting 153 // to the instance via SSH and carrying out the cloud-config. 154 // 155 // Note: FinishBootstrap is exposed so it can be replaced for testing. 156 var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, machineConfig *cloudinit.MachineConfig) error { 157 interrupted := make(chan os.Signal, 1) 158 ctx.InterruptNotify(interrupted) 159 defer ctx.StopInterruptNotify(interrupted) 160 // Each attempt to connect to an address must verify the machine is the 161 // bootstrap machine by checking its nonce file exists and contains the 162 // nonce in the MachineConfig. This also blocks sshinit from proceeding 163 // until cloud-init has completed, which is necessary to ensure apt 164 // invocations don't trample each other. 165 nonceFile := utils.ShQuote(path.Join(machineConfig.DataDir, cloudinit.NonceFile)) 166 checkNonceCommand := fmt.Sprintf(` 167 noncefile=%s 168 if [ ! -e "$noncefile" ]; then 169 echo "$noncefile does not exist" >&2 170 exit 1 171 fi 172 content=$(cat $noncefile) 173 if [ "$content" != %s ]; then 174 echo "$noncefile contents do not match machine nonce" >&2 175 exit 1 176 fi 177 `, nonceFile, utils.ShQuote(machineConfig.MachineNonce)) 178 addr, err := waitSSH( 179 ctx, 180 interrupted, 181 client, 182 checkNonceCommand, 183 inst, 184 machineConfig.Config.BootstrapSSHOpts(), 185 ) 186 if err != nil { 187 return err 188 } 189 // Bootstrap is synchronous, and will spawn a subprocess 190 // to complete the procedure. If the user hits Ctrl-C, 191 // SIGINT is sent to the foreground process attached to 192 // the terminal, which will be the ssh subprocess at this 193 // point. For that reason, we do not call StopInterruptNotify 194 // until this function completes. 195 cloudcfg := coreCloudinit.New() 196 if err := cloudinit.ConfigureJuju(machineConfig, cloudcfg); err != nil { 197 return err 198 } 199 return sshinit.Configure(sshinit.ConfigureParams{ 200 Host: "ubuntu@" + addr, 201 Client: client, 202 Config: cloudcfg, 203 ProgressWriter: ctx.GetStderr(), 204 }) 205 } 206 207 type addresser interface { 208 // Refresh refreshes the addresses for the instance. 209 Refresh() error 210 211 // Addresses returns the addresses for the instance. 212 // To ensure that the results are up to date, call 213 // Refresh first. 214 Addresses() ([]instance.Address, error) 215 } 216 217 type hostChecker struct { 218 addr instance.Address 219 client ssh.Client 220 221 // checkDelay is the amount of time to wait between retries. 222 checkDelay time.Duration 223 224 // checkHostScript is executed on the host via SSH. 225 // hostChecker.loop will return once the script 226 // runs without error. 227 checkHostScript string 228 229 // closed is closed to indicate that the host checker should 230 // return, without waiting for the result of any ongoing 231 // attempts. 232 closed <-chan struct{} 233 } 234 235 // Close implements io.Closer, as required by parallel.Try. 236 func (*hostChecker) Close() error { 237 return nil 238 } 239 240 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 241 // The value of connectSSH is taken outside the goroutine that may outlive 242 // hostChecker.loop, or we evoke the wrath of the race detector. 243 connectSSH := connectSSH 244 done := make(chan error, 1) 245 var lastErr error 246 for { 247 go func() { 248 done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript) 249 }() 250 select { 251 case <-hc.closed: 252 return hc, lastErr 253 case <-dying: 254 return hc, lastErr 255 case lastErr = <-done: 256 if lastErr == nil { 257 return hc, nil 258 } 259 } 260 select { 261 case <-hc.closed: 262 case <-dying: 263 case <-time.After(hc.checkDelay): 264 } 265 } 266 } 267 268 type parallelHostChecker struct { 269 *parallel.Try 270 client ssh.Client 271 stderr io.Writer 272 273 // active is a map of adresses to channels for addresses actively 274 // being tested. The goroutine testing the address will continue 275 // to attempt connecting to the address until it succeeds, the Try 276 // is killed, or the corresponding channel in this map is closed. 277 active map[instance.Address]chan struct{} 278 279 // checkDelay is how long each hostChecker waits between attempts. 280 checkDelay time.Duration 281 282 // checkHostScript is the script to run on each host to check that 283 // it is the host we expect. 284 checkHostScript string 285 } 286 287 func (p *parallelHostChecker) UpdateAddresses(addrs []instance.Address) { 288 for _, addr := range addrs { 289 if _, ok := p.active[addr]; ok { 290 continue 291 } 292 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 293 closed := make(chan struct{}) 294 hc := &hostChecker{ 295 addr: addr, 296 client: p.client, 297 checkDelay: p.checkDelay, 298 checkHostScript: p.checkHostScript, 299 closed: closed, 300 } 301 p.active[addr] = closed 302 p.Start(hc.loop) 303 } 304 } 305 306 // Close prevents additional functions from being added to 307 // the Try, and tells each active hostChecker to exit. 308 func (p *parallelHostChecker) Close() error { 309 // We signal each checker to stop and wait for them 310 // each to complete; this allows us to get the error, 311 // as opposed to when using try.Kill which does not 312 // wait for the functions to complete. 313 p.Try.Close() 314 for _, ch := range p.active { 315 close(ch) 316 } 317 return nil 318 } 319 320 // connectSSH is called to connect to the specified host and 321 // execute the "checkHostScript" bash script on it. 322 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 323 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 324 cmd.Stdin = strings.NewReader(checkHostScript) 325 output, err := cmd.CombinedOutput() 326 if err != nil && len(output) > 0 { 327 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 328 } 329 return err 330 } 331 332 // waitSSH waits for the instance to be assigned a routable 333 // address, then waits until we can connect to it via SSH. 334 // 335 // waitSSH attempts on all addresses returned by the instance 336 // in parallel; the first succeeding one wins. We ensure that 337 // private addresses are for the correct machine by checking 338 // the presence of a file on the machine that contains the 339 // machine's nonce. The "checkHostScript" is a bash script 340 // that performs this file check. 341 func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 342 globalTimeout := time.After(timeout.Timeout) 343 pollAddresses := time.NewTimer(0) 344 345 // checker checks each address in a loop, in parallel, 346 // until one succeeds, the global timeout is reached, 347 // or the tomb is killed. 348 checker := parallelHostChecker{ 349 Try: parallel.NewTry(0, nil), 350 client: client, 351 stderr: ctx.GetStderr(), 352 active: make(map[instance.Address]chan struct{}), 353 checkDelay: timeout.RetryDelay, 354 checkHostScript: checkHostScript, 355 } 356 defer checker.Kill() 357 358 fmt.Fprintln(ctx.GetStderr(), "Waiting for address") 359 for { 360 select { 361 case <-pollAddresses.C: 362 pollAddresses.Reset(timeout.AddressesDelay) 363 if err := inst.Refresh(); err != nil { 364 return "", fmt.Errorf("refreshing addresses: %v", err) 365 } 366 addresses, err := inst.Addresses() 367 if err != nil { 368 return "", fmt.Errorf("getting addresses: %v", err) 369 } 370 checker.UpdateAddresses(addresses) 371 case <-globalTimeout: 372 checker.Close() 373 lastErr := checker.Wait() 374 format := "waited for %v " 375 args := []interface{}{timeout.Timeout} 376 if len(checker.active) == 0 { 377 format += "without getting any addresses" 378 } else { 379 format += "without being able to connect" 380 } 381 if lastErr != nil && lastErr != parallel.ErrStopped { 382 format += ": %v" 383 args = append(args, lastErr) 384 } 385 return "", fmt.Errorf(format, args...) 386 case <-interrupted: 387 return "", fmt.Errorf("interrupted") 388 case <-checker.Dead(): 389 result, err := checker.Result() 390 if err != nil { 391 return "", err 392 } 393 return result.(*hostChecker).addr.Value, nil 394 } 395 } 396 } 397 398 // EnsureBootstrapTools finds tools, syncing with an external tools source as 399 // necessary; it then selects the newest tools to bootstrap with, and sets 400 // agent-version. 401 func EnsureBootstrapTools(env environs.Environ, series string, arch *string) (coretools.List, error) { 402 possibleTools, err := bootstrap.EnsureToolsAvailability(env, series, arch) 403 if err != nil { 404 return nil, err 405 } 406 return bootstrap.SetBootstrapTools(env, possibleTools) 407 } 408 409 // EnsureNotBootstrapped returns null if the environment is not bootstrapped, 410 // and an error if it is or if the function was not able to tell. 411 func EnsureNotBootstrapped(env environs.Environ) error { 412 _, err := bootstrap.LoadState(env.Storage()) 413 // If there is no error loading the bootstrap state, then we are 414 // bootstrapped. 415 if err == nil { 416 return fmt.Errorf("environment is already bootstrapped") 417 } 418 if err == environs.ErrNotBootstrapped { 419 return nil 420 } 421 return err 422 }