github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/series" 20 "github.com/juju/utils/shell" 21 "github.com/juju/utils/ssh" 22 23 "github.com/juju/juju/agent" 24 "github.com/juju/juju/cloudconfig" 25 "github.com/juju/juju/cloudconfig/cloudinit" 26 "github.com/juju/juju/cloudconfig/instancecfg" 27 "github.com/juju/juju/cloudconfig/sshinit" 28 "github.com/juju/juju/environs" 29 "github.com/juju/juju/environs/config" 30 "github.com/juju/juju/environs/imagemetadata" 31 "github.com/juju/juju/environs/simplestreams" 32 "github.com/juju/juju/instance" 33 "github.com/juju/juju/network" 34 "github.com/juju/juju/status" 35 coretools "github.com/juju/juju/tools" 36 ) 37 38 var logger = loggo.GetLogger("juju.provider.common") 39 40 // Bootstrap is a common implementation of the Bootstrap method defined on 41 // environs.Environ; we strongly recommend that this implementation be used 42 // when writing a new provider. 43 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 44 ) (*environs.BootstrapResult, error) { 45 result, series, finalizer, err := BootstrapInstance(ctx, env, args) 46 if err != nil { 47 return nil, errors.Trace(err) 48 } 49 50 bsResult := &environs.BootstrapResult{ 51 Arch: *result.Hardware.Arch, 52 Series: series, 53 Finalize: finalizer, 54 } 55 return bsResult, nil 56 } 57 58 // BootstrapInstance creates a new instance with the series of its choice, 59 // constrained to those of the available tools, and 60 // returns the instance result, series, and a function that 61 // must be called to finalize the bootstrap process by transferring 62 // the tools and installing the initial Juju controller. 63 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 64 // is also exported so that providers can manipulate the started instance. 65 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 66 ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) { 67 // TODO make safe in the case of racing Bootstraps 68 // If two Bootstraps are called concurrently, there's 69 // no way to make sure that only one succeeds. 70 71 // First thing, ensure we have tools otherwise there's no point. 72 if args.BootstrapSeries != "" { 73 selectedSeries = args.BootstrapSeries 74 } else { 75 selectedSeries = config.PreferredSeries(env.Config()) 76 } 77 availableTools, err := args.AvailableTools.Match(coretools.Filter{ 78 Series: selectedSeries, 79 }) 80 if err != nil { 81 return nil, "", nil, err 82 } 83 84 // Filter image metadata to the selected series. 85 var imageMetadata []*imagemetadata.ImageMetadata 86 seriesVersion, err := series.SeriesVersion(selectedSeries) 87 if err != nil { 88 return nil, "", nil, errors.Trace(err) 89 } 90 for _, m := range args.ImageMetadata { 91 if m.Version != seriesVersion { 92 continue 93 } 94 imageMetadata = append(imageMetadata, m) 95 } 96 97 // Get the bootstrap SSH client. Do this early, so we know 98 // not to bother with any of the below if we can't finish the job. 99 client := ssh.DefaultClient 100 if client == nil { 101 // This should never happen: if we don't have OpenSSH, then 102 // go.crypto/ssh should be used with an auto-generated key. 103 return nil, "", nil, fmt.Errorf("no SSH client available") 104 } 105 106 publicKey, err := simplestreams.UserPublicSigningKey() 107 if err != nil { 108 return nil, "", nil, err 109 } 110 envCfg := env.Config() 111 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig( 112 args.ControllerConfig, args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey, 113 ) 114 if err != nil { 115 return nil, "", nil, err 116 } 117 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 118 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 119 120 instanceConfig.Tags = instancecfg.InstanceTags(envCfg.UUID(), args.ControllerConfig.ControllerUUID(), envCfg, instanceConfig.Jobs) 121 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 122 // If we need to override the default bridge name, do it now. When 123 // args.ContainerBridgeName is empty, the default names for LXC 124 // (lxcbr0) and KVM (virbr0) will be used. 125 if args.ContainerBridgeName != "" { 126 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 127 if icfg.AgentEnvironment == nil { 128 icfg.AgentEnvironment = make(map[string]string) 129 } 130 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 131 } 132 } 133 maybeSetBridge(instanceConfig) 134 135 cloudRegion := args.CloudName 136 if args.CloudRegion != "" { 137 cloudRegion += "/" + args.CloudRegion 138 } 139 fmt.Fprintf(ctx.GetStderr(), "Launching controller instance(s) on %s...\n", cloudRegion) 140 // Print instance status reports status changes during provisioning. 141 // Note the carriage returns, meaning subsequent prints are to the same 142 // line of stderr, not a new line. 143 instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error { 144 // The data arg is not expected to be used in this case, but 145 // print it, rather than ignore it, if we get something. 146 dataString := "" 147 if len(data) > 0 { 148 dataString = fmt.Sprintf(" %v", data) 149 } 150 fmt.Fprintf(ctx.GetStderr(), " - %s%s\r", info, dataString) 151 return nil 152 } 153 // Likely used after the final instanceStatus call to white-out the 154 // current stderr line before the next use, removing any residual status 155 // reporting output. 156 statusCleanup := func(info string) error { 157 // The leading spaces account for the leading characters 158 // emitted by instanceStatus above. 159 fmt.Fprintf(ctx.GetStderr(), " %s\r", info) 160 return nil 161 } 162 result, err := env.StartInstance(environs.StartInstanceParams{ 163 ControllerUUID: args.ControllerConfig.ControllerUUID(), 164 Constraints: args.BootstrapConstraints, 165 Tools: availableTools, 166 InstanceConfig: instanceConfig, 167 Placement: args.Placement, 168 ImageMetadata: imageMetadata, 169 StatusCallback: instanceStatus, 170 CleanupCallback: statusCleanup, 171 }) 172 if err != nil { 173 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 174 } 175 // We need some padding below to overwrite any previous messages. We'll use a width of 40. 176 msg := fmt.Sprintf(" - %s", result.Instance.Id()) 177 if len(msg) < 40 { 178 padding := make([]string, 40-len(msg)) 179 msg += strings.Join(padding, " ") 180 } 181 fmt.Fprintln(ctx.GetStderr(), msg) 182 183 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig, opts environs.BootstrapDialOpts) error { 184 icfg.Bootstrap.BootstrapMachineInstanceId = result.Instance.Id() 185 icfg.Bootstrap.BootstrapMachineHardwareCharacteristics = result.Hardware 186 envConfig := env.Config() 187 if result.Config != nil { 188 updated, err := envConfig.Apply(result.Config.UnknownAttrs()) 189 if err != nil { 190 return errors.Trace(err) 191 } 192 envConfig = updated 193 } 194 if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil { 195 return err 196 } 197 maybeSetBridge(icfg) 198 return FinishBootstrap(ctx, client, env, result.Instance, icfg, opts) 199 } 200 return result, selectedSeries, finalize, nil 201 } 202 203 // FinishBootstrap completes the bootstrap process by connecting 204 // to the instance via SSH and carrying out the cloud-config. 205 // 206 // Note: FinishBootstrap is exposed so it can be replaced for testing. 207 var FinishBootstrap = func( 208 ctx environs.BootstrapContext, 209 client ssh.Client, 210 env environs.Environ, 211 inst instance.Instance, 212 instanceConfig *instancecfg.InstanceConfig, 213 opts environs.BootstrapDialOpts, 214 ) error { 215 interrupted := make(chan os.Signal, 1) 216 ctx.InterruptNotify(interrupted) 217 defer ctx.StopInterruptNotify(interrupted) 218 addr, err := WaitSSH( 219 ctx.GetStderr(), 220 interrupted, 221 client, 222 GetCheckNonceCommand(instanceConfig), 223 &RefreshableInstance{inst, env}, 224 opts, 225 ) 226 if err != nil { 227 return err 228 } 229 return ConfigureMachine(ctx, client, addr, instanceConfig) 230 } 231 232 func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string { 233 // Each attempt to connect to an address must verify the machine is the 234 // bootstrap machine by checking its nonce file exists and contains the 235 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 236 // until cloud-init has completed, which is necessary to ensure apt 237 // invocations don't trample each other. 238 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 239 checkNonceCommand := fmt.Sprintf(` 240 noncefile=%s 241 if [ ! -e "$noncefile" ]; then 242 echo "$noncefile does not exist" >&2 243 exit 1 244 fi 245 content=$(cat $noncefile) 246 if [ "$content" != %s ]; then 247 echo "$noncefile contents do not match machine nonce" >&2 248 exit 1 249 fi 250 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 251 return checkNonceCommand 252 } 253 254 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 255 // Bootstrap is synchronous, and will spawn a subprocess 256 // to complete the procedure. If the user hits Ctrl-C, 257 // SIGINT is sent to the foreground process attached to 258 // the terminal, which will be the ssh subprocess at this 259 // point. For that reason, we do not call StopInterruptNotify 260 // until this function completes. 261 cloudcfg, err := cloudinit.New(instanceConfig.Series) 262 if err != nil { 263 return errors.Trace(err) 264 } 265 266 // Set packaging update here 267 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 268 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 269 270 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 271 if err != nil { 272 return err 273 } 274 if err := udata.ConfigureJuju(); err != nil { 275 return err 276 } 277 configScript, err := cloudcfg.RenderScript() 278 if err != nil { 279 return err 280 } 281 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 282 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 283 Host: "ubuntu@" + host, 284 Client: client, 285 Config: cloudcfg, 286 ProgressWriter: ctx.GetStderr(), 287 Series: instanceConfig.Series, 288 }) 289 } 290 291 // InstanceRefresher is the subet of the Instance interface required 292 // for waiting for SSH access to become availble. 293 type InstanceRefresher interface { 294 // Refresh refreshes the addresses for the instance. 295 Refresh() error 296 297 // Addresses returns the addresses for the instance. 298 // To ensure that the results are up to date, call 299 // Refresh first. 300 Addresses() ([]network.Address, error) 301 302 // Status returns the provider-specific status for the 303 // instance. 304 Status() instance.InstanceStatus 305 } 306 307 type RefreshableInstance struct { 308 instance.Instance 309 Env environs.Environ 310 } 311 312 // Refresh refreshes the addresses for the instance. 313 func (i *RefreshableInstance) Refresh() error { 314 instances, err := i.Env.Instances([]instance.Id{i.Id()}) 315 if err != nil { 316 return errors.Trace(err) 317 } 318 i.Instance = instances[0] 319 return nil 320 } 321 322 type hostChecker struct { 323 addr network.Address 324 client ssh.Client 325 wg *sync.WaitGroup 326 327 // checkDelay is the amount of time to wait between retries. 328 checkDelay time.Duration 329 330 // checkHostScript is executed on the host via SSH. 331 // hostChecker.loop will return once the script 332 // runs without error. 333 checkHostScript string 334 335 // closed is closed to indicate that the host checker should 336 // return, without waiting for the result of any ongoing 337 // attempts. 338 closed <-chan struct{} 339 } 340 341 // Close implements io.Closer, as required by parallel.Try. 342 func (*hostChecker) Close() error { 343 return nil 344 } 345 346 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 347 defer hc.wg.Done() 348 // The value of connectSSH is taken outside the goroutine that may outlive 349 // hostChecker.loop, or we evoke the wrath of the race detector. 350 connectSSH := connectSSH 351 done := make(chan error, 1) 352 var lastErr error 353 for { 354 address := hc.addr.Value 355 go func() { 356 done <- connectSSH(hc.client, address, hc.checkHostScript) 357 }() 358 select { 359 case <-dying: 360 return hc, lastErr 361 case lastErr = <-done: 362 if lastErr == nil { 363 return hc, nil 364 } 365 logger.Debugf("connection attempt for %s failed: %v", address, lastErr) 366 } 367 select { 368 case <-hc.closed: 369 return hc, lastErr 370 case <-dying: 371 case <-time.After(hc.checkDelay): 372 } 373 } 374 } 375 376 type parallelHostChecker struct { 377 *parallel.Try 378 client ssh.Client 379 stderr io.Writer 380 wg sync.WaitGroup 381 382 // active is a map of adresses to channels for addresses actively 383 // being tested. The goroutine testing the address will continue 384 // to attempt connecting to the address until it succeeds, the Try 385 // is killed, or the corresponding channel in this map is closed. 386 active map[network.Address]chan struct{} 387 388 // checkDelay is how long each hostChecker waits between attempts. 389 checkDelay time.Duration 390 391 // checkHostScript is the script to run on each host to check that 392 // it is the host we expect. 393 checkHostScript string 394 } 395 396 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 397 for _, addr := range addrs { 398 if _, ok := p.active[addr]; ok { 399 continue 400 } 401 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 402 closed := make(chan struct{}) 403 hc := &hostChecker{ 404 addr: addr, 405 client: p.client, 406 checkDelay: p.checkDelay, 407 checkHostScript: p.checkHostScript, 408 closed: closed, 409 wg: &p.wg, 410 } 411 p.wg.Add(1) 412 p.active[addr] = closed 413 p.Start(hc.loop) 414 } 415 } 416 417 // Close prevents additional functions from being added to 418 // the Try, and tells each active hostChecker to exit. 419 func (p *parallelHostChecker) Close() error { 420 // We signal each checker to stop and wait for them 421 // each to complete; this allows us to get the error, 422 // as opposed to when using try.Kill which does not 423 // wait for the functions to complete. 424 p.Try.Close() 425 for _, ch := range p.active { 426 close(ch) 427 } 428 return nil 429 } 430 431 // connectSSH is called to connect to the specified host and 432 // execute the "checkHostScript" bash script on it. 433 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 434 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 435 cmd.Stdin = strings.NewReader(checkHostScript) 436 output, err := cmd.CombinedOutput() 437 if err != nil && len(output) > 0 { 438 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 439 } 440 return err 441 } 442 443 // WaitSSH waits for the instance to be assigned a routable 444 // address, then waits until we can connect to it via SSH. 445 // 446 // waitSSH attempts on all addresses returned by the instance 447 // in parallel; the first succeeding one wins. We ensure that 448 // private addresses are for the correct machine by checking 449 // the presence of a file on the machine that contains the 450 // machine's nonce. The "checkHostScript" is a bash script 451 // that performs this file check. 452 func WaitSSH( 453 stdErr io.Writer, 454 interrupted <-chan os.Signal, 455 client ssh.Client, 456 checkHostScript string, 457 inst InstanceRefresher, 458 opts environs.BootstrapDialOpts, 459 ) (addr string, err error) { 460 globalTimeout := time.After(opts.Timeout) 461 pollAddresses := time.NewTimer(0) 462 463 // checker checks each address in a loop, in parallel, 464 // until one succeeds, the global timeout is reached, 465 // or the tomb is killed. 466 checker := parallelHostChecker{ 467 Try: parallel.NewTry(0, nil), 468 client: client, 469 stderr: stdErr, 470 active: make(map[network.Address]chan struct{}), 471 checkDelay: opts.RetryDelay, 472 checkHostScript: checkHostScript, 473 } 474 defer checker.wg.Wait() 475 defer checker.Kill() 476 477 fmt.Fprintln(stdErr, "Waiting for address") 478 for { 479 select { 480 case <-pollAddresses.C: 481 pollAddresses.Reset(opts.AddressesDelay) 482 if err := inst.Refresh(); err != nil { 483 return "", fmt.Errorf("refreshing addresses: %v", err) 484 } 485 instanceStatus := inst.Status() 486 if instanceStatus.Status == status.ProvisioningError { 487 if instanceStatus.Message != "" { 488 return "", errors.Errorf("instance provisioning failed (%v)", instanceStatus.Message) 489 } 490 return "", errors.Errorf("instance provisioning failed") 491 } 492 addresses, err := inst.Addresses() 493 if err != nil { 494 return "", fmt.Errorf("getting addresses: %v", err) 495 } 496 checker.UpdateAddresses(addresses) 497 case <-globalTimeout: 498 checker.Close() 499 lastErr := checker.Wait() 500 format := "waited for %v " 501 args := []interface{}{opts.Timeout} 502 if len(checker.active) == 0 { 503 format += "without getting any addresses" 504 } else { 505 format += "without being able to connect" 506 } 507 if lastErr != nil && lastErr != parallel.ErrStopped { 508 format += ": %v" 509 args = append(args, lastErr) 510 } 511 return "", fmt.Errorf(format, args...) 512 case <-interrupted: 513 return "", fmt.Errorf("interrupted") 514 case <-checker.Dead(): 515 result, err := checker.Result() 516 if err != nil { 517 return "", err 518 } 519 return result.(*hostChecker).addr.Value, nil 520 } 521 } 522 }