github.com/mwhudson/juju@v0.0.0-20160512215208-90ff01f3497f/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/series" 20 "github.com/juju/utils/shell" 21 "github.com/juju/utils/ssh" 22 23 "github.com/juju/juju/agent" 24 "github.com/juju/juju/cloudconfig" 25 "github.com/juju/juju/cloudconfig/cloudinit" 26 "github.com/juju/juju/cloudconfig/instancecfg" 27 "github.com/juju/juju/cloudconfig/sshinit" 28 "github.com/juju/juju/environs" 29 "github.com/juju/juju/environs/config" 30 "github.com/juju/juju/environs/imagemetadata" 31 "github.com/juju/juju/environs/simplestreams" 32 "github.com/juju/juju/instance" 33 "github.com/juju/juju/network" 34 "github.com/juju/juju/status" 35 coretools "github.com/juju/juju/tools" 36 ) 37 38 var logger = loggo.GetLogger("juju.provider.common") 39 40 // Bootstrap is a common implementation of the Bootstrap method defined on 41 // environs.Environ; we strongly recommend that this implementation be used 42 // when writing a new provider. 43 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 44 ) (*environs.BootstrapResult, error) { 45 result, series, finalizer, err := BootstrapInstance(ctx, env, args) 46 if err != nil { 47 return nil, errors.Trace(err) 48 } 49 50 bsResult := &environs.BootstrapResult{ 51 Arch: *result.Hardware.Arch, 52 Series: series, 53 Finalize: finalizer, 54 } 55 return bsResult, nil 56 } 57 58 // BootstrapInstance creates a new instance with the series and architecture 59 // of its choice, constrained to those of the available tools, and 60 // returns the instance result, series, and a function that 61 // must be called to finalize the bootstrap process by transferring 62 // the tools and installing the initial Juju controller. 63 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 64 // is also exported so that providers can manipulate the started instance. 65 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 66 ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) { 67 // TODO make safe in the case of racing Bootstraps 68 // If two Bootstraps are called concurrently, there's 69 // no way to make sure that only one succeeds. 70 71 // First thing, ensure we have tools otherwise there's no point. 72 if args.BootstrapSeries != "" { 73 selectedSeries = args.BootstrapSeries 74 } else { 75 selectedSeries = config.PreferredSeries(env.Config()) 76 } 77 availableTools, err := args.AvailableTools.Match(coretools.Filter{ 78 Series: selectedSeries, 79 }) 80 if err != nil { 81 return nil, "", nil, err 82 } 83 84 // Filter image metadata to the selected series. 85 var imageMetadata []*imagemetadata.ImageMetadata 86 seriesVersion, err := series.SeriesVersion(selectedSeries) 87 if err != nil { 88 return nil, "", nil, errors.Trace(err) 89 } 90 for _, m := range args.ImageMetadata { 91 if m.Version != seriesVersion { 92 continue 93 } 94 imageMetadata = append(imageMetadata, m) 95 } 96 97 // Get the bootstrap SSH client. Do this early, so we know 98 // not to bother with any of the below if we can't finish the job. 99 client := ssh.DefaultClient 100 if client == nil { 101 // This should never happen: if we don't have OpenSSH, then 102 // go.crypto/ssh should be used with an auto-generated key. 103 return nil, "", nil, fmt.Errorf("no SSH client available") 104 } 105 106 publicKey, err := simplestreams.UserPublicSigningKey() 107 if err != nil { 108 return nil, "", nil, err 109 } 110 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig( 111 args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey, 112 ) 113 if err != nil { 114 return nil, "", nil, err 115 } 116 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 117 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 118 instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs) 119 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 120 // If we need to override the default bridge name, do it now. When 121 // args.ContainerBridgeName is empty, the default names for LXC 122 // (lxcbr0) and KVM (virbr0) will be used. 123 if args.ContainerBridgeName != "" { 124 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 125 if icfg.AgentEnvironment == nil { 126 icfg.AgentEnvironment = make(map[string]string) 127 } 128 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 129 } 130 } 131 maybeSetBridge(instanceConfig) 132 133 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 134 instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error { 135 fmt.Fprintf(ctx.GetStderr(), "%s \r", info) 136 return nil 137 } 138 result, err := env.StartInstance(environs.StartInstanceParams{ 139 Constraints: args.BootstrapConstraints, 140 Tools: availableTools, 141 InstanceConfig: instanceConfig, 142 Placement: args.Placement, 143 ImageMetadata: imageMetadata, 144 StatusCallback: instanceStatus, 145 }) 146 if err != nil { 147 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 148 } 149 fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id()) 150 151 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error { 152 icfg.InstanceId = result.Instance.Id() 153 icfg.HardwareCharacteristics = result.Hardware 154 envConfig := env.Config() 155 if result.Config != nil { 156 updated, err := envConfig.Apply(result.Config.UnknownAttrs()) 157 if err != nil { 158 return errors.Trace(err) 159 } 160 envConfig = updated 161 } 162 if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil { 163 return err 164 } 165 maybeSetBridge(icfg) 166 return FinishBootstrap(ctx, client, env, result.Instance, icfg) 167 } 168 return result, selectedSeries, finalize, nil 169 } 170 171 // FinishBootstrap completes the bootstrap process by connecting 172 // to the instance via SSH and carrying out the cloud-config. 173 // 174 // Note: FinishBootstrap is exposed so it can be replaced for testing. 175 var FinishBootstrap = func( 176 ctx environs.BootstrapContext, 177 client ssh.Client, 178 env environs.Environ, 179 inst instance.Instance, 180 instanceConfig *instancecfg.InstanceConfig, 181 ) error { 182 interrupted := make(chan os.Signal, 1) 183 ctx.InterruptNotify(interrupted) 184 defer ctx.StopInterruptNotify(interrupted) 185 addr, err := WaitSSH( 186 ctx.GetStderr(), 187 interrupted, 188 client, 189 GetCheckNonceCommand(instanceConfig), 190 &RefreshableInstance{inst, env}, 191 instanceConfig.Config.BootstrapSSHOpts(), 192 ) 193 if err != nil { 194 return err 195 } 196 return ConfigureMachine(ctx, client, addr, instanceConfig) 197 } 198 199 func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string { 200 // Each attempt to connect to an address must verify the machine is the 201 // bootstrap machine by checking its nonce file exists and contains the 202 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 203 // until cloud-init has completed, which is necessary to ensure apt 204 // invocations don't trample each other. 205 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 206 checkNonceCommand := fmt.Sprintf(` 207 noncefile=%s 208 if [ ! -e "$noncefile" ]; then 209 echo "$noncefile does not exist" >&2 210 exit 1 211 fi 212 content=$(cat $noncefile) 213 if [ "$content" != %s ]; then 214 echo "$noncefile contents do not match machine nonce" >&2 215 exit 1 216 fi 217 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 218 return checkNonceCommand 219 } 220 221 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 222 // Bootstrap is synchronous, and will spawn a subprocess 223 // to complete the procedure. If the user hits Ctrl-C, 224 // SIGINT is sent to the foreground process attached to 225 // the terminal, which will be the ssh subprocess at this 226 // point. For that reason, we do not call StopInterruptNotify 227 // until this function completes. 228 cloudcfg, err := cloudinit.New(instanceConfig.Series) 229 if err != nil { 230 return errors.Trace(err) 231 } 232 233 // Set packaging update here 234 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 235 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 236 237 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 238 if err != nil { 239 return err 240 } 241 if err := udata.ConfigureJuju(); err != nil { 242 return err 243 } 244 configScript, err := cloudcfg.RenderScript() 245 if err != nil { 246 return err 247 } 248 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 249 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 250 Host: "ubuntu@" + host, 251 Client: client, 252 Config: cloudcfg, 253 ProgressWriter: ctx.GetStderr(), 254 Series: instanceConfig.Series, 255 }) 256 } 257 258 type Addresser interface { 259 // Refresh refreshes the addresses for the instance. 260 Refresh() error 261 262 // Addresses returns the addresses for the instance. 263 // To ensure that the results are up to date, call 264 // Refresh first. 265 Addresses() ([]network.Address, error) 266 } 267 268 type RefreshableInstance struct { 269 instance.Instance 270 Env environs.Environ 271 } 272 273 // Refresh refreshes the addresses for the instance. 274 func (i *RefreshableInstance) Refresh() error { 275 instances, err := i.Env.Instances([]instance.Id{i.Id()}) 276 if err != nil { 277 return errors.Trace(err) 278 } 279 i.Instance = instances[0] 280 return nil 281 } 282 283 type hostChecker struct { 284 addr network.Address 285 client ssh.Client 286 wg *sync.WaitGroup 287 288 // checkDelay is the amount of time to wait between retries. 289 checkDelay time.Duration 290 291 // checkHostScript is executed on the host via SSH. 292 // hostChecker.loop will return once the script 293 // runs without error. 294 checkHostScript string 295 296 // closed is closed to indicate that the host checker should 297 // return, without waiting for the result of any ongoing 298 // attempts. 299 closed <-chan struct{} 300 } 301 302 // Close implements io.Closer, as required by parallel.Try. 303 func (*hostChecker) Close() error { 304 return nil 305 } 306 307 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 308 defer hc.wg.Done() 309 // The value of connectSSH is taken outside the goroutine that may outlive 310 // hostChecker.loop, or we evoke the wrath of the race detector. 311 connectSSH := connectSSH 312 done := make(chan error, 1) 313 var lastErr error 314 for { 315 address := hc.addr.Value 316 go func() { 317 done <- connectSSH(hc.client, address, hc.checkHostScript) 318 }() 319 select { 320 case <-dying: 321 return hc, lastErr 322 case lastErr = <-done: 323 if lastErr == nil { 324 return hc, nil 325 } 326 logger.Debugf("connection attempt for %s failed: %v", address, lastErr) 327 } 328 select { 329 case <-hc.closed: 330 return hc, lastErr 331 case <-dying: 332 case <-time.After(hc.checkDelay): 333 } 334 } 335 } 336 337 type parallelHostChecker struct { 338 *parallel.Try 339 client ssh.Client 340 stderr io.Writer 341 wg sync.WaitGroup 342 343 // active is a map of adresses to channels for addresses actively 344 // being tested. The goroutine testing the address will continue 345 // to attempt connecting to the address until it succeeds, the Try 346 // is killed, or the corresponding channel in this map is closed. 347 active map[network.Address]chan struct{} 348 349 // checkDelay is how long each hostChecker waits between attempts. 350 checkDelay time.Duration 351 352 // checkHostScript is the script to run on each host to check that 353 // it is the host we expect. 354 checkHostScript string 355 } 356 357 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 358 for _, addr := range addrs { 359 if _, ok := p.active[addr]; ok { 360 continue 361 } 362 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 363 closed := make(chan struct{}) 364 hc := &hostChecker{ 365 addr: addr, 366 client: p.client, 367 checkDelay: p.checkDelay, 368 checkHostScript: p.checkHostScript, 369 closed: closed, 370 wg: &p.wg, 371 } 372 p.wg.Add(1) 373 p.active[addr] = closed 374 p.Start(hc.loop) 375 } 376 } 377 378 // Close prevents additional functions from being added to 379 // the Try, and tells each active hostChecker to exit. 380 func (p *parallelHostChecker) Close() error { 381 // We signal each checker to stop and wait for them 382 // each to complete; this allows us to get the error, 383 // as opposed to when using try.Kill which does not 384 // wait for the functions to complete. 385 p.Try.Close() 386 for _, ch := range p.active { 387 close(ch) 388 } 389 return nil 390 } 391 392 // connectSSH is called to connect to the specified host and 393 // execute the "checkHostScript" bash script on it. 394 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 395 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 396 cmd.Stdin = strings.NewReader(checkHostScript) 397 output, err := cmd.CombinedOutput() 398 if err != nil && len(output) > 0 { 399 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 400 } 401 return err 402 } 403 404 // WaitSSH waits for the instance to be assigned a routable 405 // address, then waits until we can connect to it via SSH. 406 // 407 // waitSSH attempts on all addresses returned by the instance 408 // in parallel; the first succeeding one wins. We ensure that 409 // private addresses are for the correct machine by checking 410 // the presence of a file on the machine that contains the 411 // machine's nonce. The "checkHostScript" is a bash script 412 // that performs this file check. 413 func WaitSSH(stdErr io.Writer, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst Addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 414 globalTimeout := time.After(timeout.Timeout) 415 pollAddresses := time.NewTimer(0) 416 417 // checker checks each address in a loop, in parallel, 418 // until one succeeds, the global timeout is reached, 419 // or the tomb is killed. 420 checker := parallelHostChecker{ 421 Try: parallel.NewTry(0, nil), 422 client: client, 423 stderr: stdErr, 424 active: make(map[network.Address]chan struct{}), 425 checkDelay: timeout.RetryDelay, 426 checkHostScript: checkHostScript, 427 } 428 defer checker.wg.Wait() 429 defer checker.Kill() 430 431 fmt.Fprintln(stdErr, "Waiting for address") 432 for { 433 select { 434 case <-pollAddresses.C: 435 pollAddresses.Reset(timeout.AddressesDelay) 436 if err := inst.Refresh(); err != nil { 437 return "", fmt.Errorf("refreshing addresses: %v", err) 438 } 439 addresses, err := inst.Addresses() 440 if err != nil { 441 return "", fmt.Errorf("getting addresses: %v", err) 442 } 443 checker.UpdateAddresses(addresses) 444 case <-globalTimeout: 445 checker.Close() 446 lastErr := checker.Wait() 447 format := "waited for %v " 448 args := []interface{}{timeout.Timeout} 449 if len(checker.active) == 0 { 450 format += "without getting any addresses" 451 } else { 452 format += "without being able to connect" 453 } 454 if lastErr != nil && lastErr != parallel.ErrStopped { 455 format += ": %v" 456 args = append(args, lastErr) 457 } 458 return "", fmt.Errorf(format, args...) 459 case <-interrupted: 460 return "", fmt.Errorf("interrupted") 461 case <-checker.Dead(): 462 result, err := checker.Result() 463 if err != nil { 464 return "", err 465 } 466 return result.(*hostChecker).addr.Value, nil 467 } 468 } 469 }