github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "time" 13 14 "github.com/juju/loggo" 15 "github.com/juju/utils" 16 "github.com/juju/utils/parallel" 17 "github.com/juju/utils/shell" 18 19 coreCloudinit "github.com/juju/juju/cloudinit" 20 "github.com/juju/juju/cloudinit/sshinit" 21 "github.com/juju/juju/environs" 22 "github.com/juju/juju/environs/bootstrap" 23 "github.com/juju/juju/environs/cloudinit" 24 "github.com/juju/juju/environs/config" 25 "github.com/juju/juju/instance" 26 "github.com/juju/juju/network" 27 coretools "github.com/juju/juju/tools" 28 "github.com/juju/juju/utils/ssh" 29 ) 30 31 var logger = loggo.GetLogger("juju.provider.common") 32 33 // Bootstrap is a common implementation of the Bootstrap method defined on 34 // environs.Environ; we strongly recommend that this implementation be used 35 // when writing a new provider. 36 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams) (err error) { 37 // TODO make safe in the case of racing Bootstraps 38 // If two Bootstraps are called concurrently, there's 39 // no way to make sure that only one succeeds. 40 41 var inst instance.Instance 42 defer func() { handleBootstrapError(err, ctx, inst, env) }() 43 44 // First thing, ensure we have tools otherwise there's no point. 45 selectedTools, err := EnsureBootstrapTools(ctx, env, config.PreferredSeries(env.Config()), args.Constraints.Arch) 46 if err != nil { 47 return err 48 } 49 50 // Get the bootstrap SSH client. Do this early, so we know 51 // not to bother with any of the below if we can't finish the job. 52 client := ssh.DefaultClient 53 if client == nil { 54 // This should never happen: if we don't have OpenSSH, then 55 // go.crypto/ssh should be used with an auto-generated key. 56 return fmt.Errorf("no SSH client available") 57 } 58 59 privateKey, err := GenerateSystemSSHKey(env) 60 if err != nil { 61 return err 62 } 63 machineConfig := environs.NewBootstrapMachineConfig(privateKey) 64 65 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 66 inst, hw, _, err := env.StartInstance(environs.StartInstanceParams{ 67 Constraints: args.Constraints, 68 Tools: selectedTools, 69 MachineConfig: machineConfig, 70 Placement: args.Placement, 71 }) 72 if err != nil { 73 return fmt.Errorf("cannot start bootstrap instance: %v", err) 74 } 75 fmt.Fprintf(ctx.GetStderr(), " - %s\n", inst.Id()) 76 machineConfig.InstanceId = inst.Id() 77 machineConfig.HardwareCharacteristics = hw 78 79 err = bootstrap.SaveState( 80 env.Storage(), 81 &bootstrap.BootstrapState{ 82 StateInstances: []instance.Id{inst.Id()}, 83 }) 84 if err != nil { 85 return fmt.Errorf("cannot save state: %v", err) 86 } 87 return FinishBootstrap(ctx, client, inst, machineConfig) 88 } 89 90 // GenerateSystemSSHKey creates a new key for the system identity. The 91 // authorized_keys in the environment config is updated to include the public 92 // key for the generated key. 93 func GenerateSystemSSHKey(env environs.Environ) (privateKey string, err error) { 94 logger.Debugf("generate a system ssh key") 95 // Create a new system ssh key and add that to the authorized keys. 96 privateKey, publicKey, err := ssh.GenerateKey(config.JujuSystemKey) 97 if err != nil { 98 return "", fmt.Errorf("failed to create system key: %v", err) 99 } 100 authorized_keys := config.ConcatAuthKeys(env.Config().AuthorizedKeys(), publicKey) 101 newConfig, err := env.Config().Apply(map[string]interface{}{ 102 config.AuthKeysConfig: authorized_keys, 103 }) 104 if err != nil { 105 return "", fmt.Errorf("failed to create new config: %v", err) 106 } 107 if err = env.SetConfig(newConfig); err != nil { 108 return "", fmt.Errorf("failed to set new config: %v", err) 109 } 110 return privateKey, nil 111 } 112 113 // handleBootstrapError cleans up after a failed bootstrap. 114 func handleBootstrapError(err error, ctx environs.BootstrapContext, inst instance.Instance, env environs.Environ) { 115 if err == nil { 116 return 117 } 118 119 logger.Errorf("bootstrap failed: %v", err) 120 ch := make(chan os.Signal, 1) 121 ctx.InterruptNotify(ch) 122 defer ctx.StopInterruptNotify(ch) 123 defer close(ch) 124 go func() { 125 for _ = range ch { 126 fmt.Fprintln(ctx.GetStderr(), "Cleaning up failed bootstrap") 127 } 128 }() 129 130 if inst != nil { 131 fmt.Fprintln(ctx.GetStderr(), "Stopping instance...") 132 if stoperr := env.StopInstances(inst.Id()); stoperr != nil { 133 logger.Errorf("cannot stop failed bootstrap instance %q: %v", inst.Id(), stoperr) 134 } else { 135 // set to nil so we know we can safely delete the state file 136 inst = nil 137 } 138 } 139 // We only delete the bootstrap state file if either we didn't 140 // start an instance, or we managed to cleanly stop it. 141 if inst == nil { 142 if rmerr := bootstrap.DeleteStateFile(env.Storage()); rmerr != nil { 143 logger.Errorf("cannot delete bootstrap state file: %v", rmerr) 144 } 145 } 146 } 147 148 // FinishBootstrap completes the bootstrap process by connecting 149 // to the instance via SSH and carrying out the cloud-config. 150 // 151 // Note: FinishBootstrap is exposed so it can be replaced for testing. 152 var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, machineConfig *cloudinit.MachineConfig) error { 153 interrupted := make(chan os.Signal, 1) 154 ctx.InterruptNotify(interrupted) 155 defer ctx.StopInterruptNotify(interrupted) 156 // Each attempt to connect to an address must verify the machine is the 157 // bootstrap machine by checking its nonce file exists and contains the 158 // nonce in the MachineConfig. This also blocks sshinit from proceeding 159 // until cloud-init has completed, which is necessary to ensure apt 160 // invocations don't trample each other. 161 nonceFile := utils.ShQuote(path.Join(machineConfig.DataDir, cloudinit.NonceFile)) 162 checkNonceCommand := fmt.Sprintf(` 163 noncefile=%s 164 if [ ! -e "$noncefile" ]; then 165 echo "$noncefile does not exist" >&2 166 exit 1 167 fi 168 content=$(cat $noncefile) 169 if [ "$content" != %s ]; then 170 echo "$noncefile contents do not match machine nonce" >&2 171 exit 1 172 fi 173 `, nonceFile, utils.ShQuote(machineConfig.MachineNonce)) 174 addr, err := waitSSH( 175 ctx, 176 interrupted, 177 client, 178 checkNonceCommand, 179 inst, 180 machineConfig.Config.BootstrapSSHOpts(), 181 ) 182 if err != nil { 183 return err 184 } 185 // Bootstrap is synchronous, and will spawn a subprocess 186 // to complete the procedure. If the user hits Ctrl-C, 187 // SIGINT is sent to the foreground process attached to 188 // the terminal, which will be the ssh subprocess at this 189 // point. For that reason, we do not call StopInterruptNotify 190 // until this function completes. 191 cloudcfg := coreCloudinit.New() 192 if err := cloudinit.ConfigureJuju(machineConfig, cloudcfg); err != nil { 193 return err 194 } 195 configScript, err := sshinit.ConfigureScript(cloudcfg) 196 if err != nil { 197 return err 198 } 199 script := shell.DumpFileOnErrorScript(machineConfig.CloudInitOutputLog) + configScript 200 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 201 Host: "ubuntu@" + addr, 202 Client: client, 203 Config: cloudcfg, 204 ProgressWriter: ctx.GetStderr(), 205 }) 206 } 207 208 type addresser interface { 209 // Refresh refreshes the addresses for the instance. 210 Refresh() error 211 212 // Addresses returns the addresses for the instance. 213 // To ensure that the results are up to date, call 214 // Refresh first. 215 Addresses() ([]network.Address, error) 216 } 217 218 type hostChecker struct { 219 addr network.Address 220 client ssh.Client 221 222 // checkDelay is the amount of time to wait between retries. 223 checkDelay time.Duration 224 225 // checkHostScript is executed on the host via SSH. 226 // hostChecker.loop will return once the script 227 // runs without error. 228 checkHostScript string 229 230 // closed is closed to indicate that the host checker should 231 // return, without waiting for the result of any ongoing 232 // attempts. 233 closed <-chan struct{} 234 } 235 236 // Close implements io.Closer, as required by parallel.Try. 237 func (*hostChecker) Close() error { 238 return nil 239 } 240 241 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 242 // The value of connectSSH is taken outside the goroutine that may outlive 243 // hostChecker.loop, or we evoke the wrath of the race detector. 244 connectSSH := connectSSH 245 done := make(chan error, 1) 246 var lastErr error 247 for { 248 go func() { 249 done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript) 250 }() 251 select { 252 case <-hc.closed: 253 return hc, lastErr 254 case <-dying: 255 return hc, lastErr 256 case lastErr = <-done: 257 if lastErr == nil { 258 return hc, nil 259 } 260 } 261 select { 262 case <-hc.closed: 263 case <-dying: 264 case <-time.After(hc.checkDelay): 265 } 266 } 267 } 268 269 type parallelHostChecker struct { 270 *parallel.Try 271 client ssh.Client 272 stderr io.Writer 273 274 // active is a map of adresses to channels for addresses actively 275 // being tested. The goroutine testing the address will continue 276 // to attempt connecting to the address until it succeeds, the Try 277 // is killed, or the corresponding channel in this map is closed. 278 active map[network.Address]chan struct{} 279 280 // checkDelay is how long each hostChecker waits between attempts. 281 checkDelay time.Duration 282 283 // checkHostScript is the script to run on each host to check that 284 // it is the host we expect. 285 checkHostScript string 286 } 287 288 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 289 for _, addr := range addrs { 290 if _, ok := p.active[addr]; ok { 291 continue 292 } 293 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 294 closed := make(chan struct{}) 295 hc := &hostChecker{ 296 addr: addr, 297 client: p.client, 298 checkDelay: p.checkDelay, 299 checkHostScript: p.checkHostScript, 300 closed: closed, 301 } 302 p.active[addr] = closed 303 p.Start(hc.loop) 304 } 305 } 306 307 // Close prevents additional functions from being added to 308 // the Try, and tells each active hostChecker to exit. 309 func (p *parallelHostChecker) Close() error { 310 // We signal each checker to stop and wait for them 311 // each to complete; this allows us to get the error, 312 // as opposed to when using try.Kill which does not 313 // wait for the functions to complete. 314 p.Try.Close() 315 for _, ch := range p.active { 316 close(ch) 317 } 318 return nil 319 } 320 321 // connectSSH is called to connect to the specified host and 322 // execute the "checkHostScript" bash script on it. 323 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 324 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 325 cmd.Stdin = strings.NewReader(checkHostScript) 326 output, err := cmd.CombinedOutput() 327 if err != nil && len(output) > 0 { 328 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 329 } 330 return err 331 } 332 333 // waitSSH waits for the instance to be assigned a routable 334 // address, then waits until we can connect to it via SSH. 335 // 336 // waitSSH attempts on all addresses returned by the instance 337 // in parallel; the first succeeding one wins. We ensure that 338 // private addresses are for the correct machine by checking 339 // the presence of a file on the machine that contains the 340 // machine's nonce. The "checkHostScript" is a bash script 341 // that performs this file check. 342 func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 343 globalTimeout := time.After(timeout.Timeout) 344 pollAddresses := time.NewTimer(0) 345 346 // checker checks each address in a loop, in parallel, 347 // until one succeeds, the global timeout is reached, 348 // or the tomb is killed. 349 checker := parallelHostChecker{ 350 Try: parallel.NewTry(0, nil), 351 client: client, 352 stderr: ctx.GetStderr(), 353 active: make(map[network.Address]chan struct{}), 354 checkDelay: timeout.RetryDelay, 355 checkHostScript: checkHostScript, 356 } 357 defer checker.Kill() 358 359 fmt.Fprintln(ctx.GetStderr(), "Waiting for address") 360 for { 361 select { 362 case <-pollAddresses.C: 363 pollAddresses.Reset(timeout.AddressesDelay) 364 if err := inst.Refresh(); err != nil { 365 return "", fmt.Errorf("refreshing addresses: %v", err) 366 } 367 addresses, err := inst.Addresses() 368 if err != nil { 369 return "", fmt.Errorf("getting addresses: %v", err) 370 } 371 checker.UpdateAddresses(addresses) 372 case <-globalTimeout: 373 checker.Close() 374 lastErr := checker.Wait() 375 format := "waited for %v " 376 args := []interface{}{timeout.Timeout} 377 if len(checker.active) == 0 { 378 format += "without getting any addresses" 379 } else { 380 format += "without being able to connect" 381 } 382 if lastErr != nil && lastErr != parallel.ErrStopped { 383 format += ": %v" 384 args = append(args, lastErr) 385 } 386 return "", fmt.Errorf(format, args...) 387 case <-interrupted: 388 return "", fmt.Errorf("interrupted") 389 case <-checker.Dead(): 390 result, err := checker.Result() 391 if err != nil { 392 return "", err 393 } 394 return result.(*hostChecker).addr.Value, nil 395 } 396 } 397 } 398 399 // EnsureBootstrapTools finds tools, syncing with an external tools source as 400 // necessary; it then selects the newest tools to bootstrap with, and sets 401 // agent-version. 402 func EnsureBootstrapTools(ctx environs.BootstrapContext, env environs.Environ, series string, arch *string) (coretools.List, error) { 403 possibleTools, err := bootstrap.EnsureToolsAvailability(ctx, env, series, arch) 404 if err != nil { 405 return nil, err 406 } 407 return bootstrap.SetBootstrapTools(env, possibleTools) 408 }