github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/series" 20 "github.com/juju/utils/shell" 21 "github.com/juju/utils/ssh" 22 23 "github.com/juju/juju/agent" 24 "github.com/juju/juju/cloudconfig" 25 "github.com/juju/juju/cloudconfig/cloudinit" 26 "github.com/juju/juju/cloudconfig/instancecfg" 27 "github.com/juju/juju/cloudconfig/sshinit" 28 "github.com/juju/juju/environs" 29 "github.com/juju/juju/environs/config" 30 "github.com/juju/juju/environs/imagemetadata" 31 "github.com/juju/juju/environs/simplestreams" 32 "github.com/juju/juju/instance" 33 "github.com/juju/juju/network" 34 "github.com/juju/juju/status" 35 coretools "github.com/juju/juju/tools" 36 ) 37 38 var logger = loggo.GetLogger("juju.provider.common") 39 40 // Bootstrap is a common implementation of the Bootstrap method defined on 41 // environs.Environ; we strongly recommend that this implementation be used 42 // when writing a new provider. 43 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 44 ) (*environs.BootstrapResult, error) { 45 result, series, finalizer, err := BootstrapInstance(ctx, env, args) 46 if err != nil { 47 return nil, errors.Trace(err) 48 } 49 50 bsResult := &environs.BootstrapResult{ 51 Arch: *result.Hardware.Arch, 52 Series: series, 53 Finalize: finalizer, 54 } 55 return bsResult, nil 56 } 57 58 // BootstrapInstance creates a new instance with the series of its choice, 59 // constrained to those of the available tools, and 60 // returns the instance result, series, and a function that 61 // must be called to finalize the bootstrap process by transferring 62 // the tools and installing the initial Juju controller. 63 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 64 // is also exported so that providers can manipulate the started instance. 65 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 66 ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) { 67 // TODO make safe in the case of racing Bootstraps 68 // If two Bootstraps are called concurrently, there's 69 // no way to make sure that only one succeeds. 70 71 // First thing, ensure we have tools otherwise there's no point. 72 if args.BootstrapSeries != "" { 73 selectedSeries = args.BootstrapSeries 74 } else { 75 selectedSeries = config.PreferredSeries(env.Config()) 76 } 77 availableTools, err := args.AvailableTools.Match(coretools.Filter{ 78 Series: selectedSeries, 79 }) 80 if err != nil { 81 return nil, "", nil, err 82 } 83 84 // Filter image metadata to the selected series. 85 var imageMetadata []*imagemetadata.ImageMetadata 86 seriesVersion, err := series.SeriesVersion(selectedSeries) 87 if err != nil { 88 return nil, "", nil, errors.Trace(err) 89 } 90 for _, m := range args.ImageMetadata { 91 if m.Version != seriesVersion { 92 continue 93 } 94 imageMetadata = append(imageMetadata, m) 95 } 96 97 // Get the bootstrap SSH client. Do this early, so we know 98 // not to bother with any of the below if we can't finish the job. 99 client := ssh.DefaultClient 100 if client == nil { 101 // This should never happen: if we don't have OpenSSH, then 102 // go.crypto/ssh should be used with an auto-generated key. 103 return nil, "", nil, fmt.Errorf("no SSH client available") 104 } 105 106 publicKey, err := simplestreams.UserPublicSigningKey() 107 if err != nil { 108 return nil, "", nil, err 109 } 110 envCfg := env.Config() 111 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig( 112 args.ControllerConfig, args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey, 113 ) 114 if err != nil { 115 return nil, "", nil, err 116 } 117 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 118 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 119 120 instanceConfig.Tags = instancecfg.InstanceTags(envCfg.UUID(), args.ControllerConfig.ControllerUUID(), envCfg, instanceConfig.Jobs) 121 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 122 // If we need to override the default bridge name, do it now. When 123 // args.ContainerBridgeName is empty, the default names for LXC 124 // (lxcbr0) and KVM (virbr0) will be used. 125 if args.ContainerBridgeName != "" { 126 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 127 if icfg.AgentEnvironment == nil { 128 icfg.AgentEnvironment = make(map[string]string) 129 } 130 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 131 } 132 } 133 maybeSetBridge(instanceConfig) 134 135 bootstrapMsg := env.BootstrapMessage() 136 if bootstrapMsg != "" { 137 ctx.Infof(bootstrapMsg) 138 } 139 140 cloudRegion := args.CloudName 141 if args.CloudRegion != "" { 142 cloudRegion += "/" + args.CloudRegion 143 } 144 fmt.Fprintf(ctx.GetStderr(), "Launching controller instance(s) on %s...\n", cloudRegion) 145 // Print instance status reports status changes during provisioning. 146 // Note the carriage returns, meaning subsequent prints are to the same 147 // line of stderr, not a new line. 148 instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error { 149 // The data arg is not expected to be used in this case, but 150 // print it, rather than ignore it, if we get something. 151 dataString := "" 152 if len(data) > 0 { 153 dataString = fmt.Sprintf(" %v", data) 154 } 155 fmt.Fprintf(ctx.GetStderr(), " - %s%s\r", info, dataString) 156 return nil 157 } 158 // Likely used after the final instanceStatus call to white-out the 159 // current stderr line before the next use, removing any residual status 160 // reporting output. 161 statusCleanup := func(info string) error { 162 // The leading spaces account for the leading characters 163 // emitted by instanceStatus above. 164 fmt.Fprintf(ctx.GetStderr(), " %s\r", info) 165 return nil 166 } 167 result, err := env.StartInstance(environs.StartInstanceParams{ 168 ControllerUUID: args.ControllerConfig.ControllerUUID(), 169 Constraints: args.BootstrapConstraints, 170 Tools: availableTools, 171 InstanceConfig: instanceConfig, 172 Placement: args.Placement, 173 ImageMetadata: imageMetadata, 174 StatusCallback: instanceStatus, 175 CleanupCallback: statusCleanup, 176 }) 177 if err != nil { 178 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 179 } 180 181 msg := fmt.Sprintf(" - %s (%s)", result.Instance.Id(), formatHardware(result.Hardware)) 182 // We need some padding below to overwrite any previous messages. 183 if len(msg) < 40 { 184 padding := make([]string, 40-len(msg)) 185 msg += strings.Join(padding, " ") 186 } 187 fmt.Fprintln(ctx.GetStderr(), msg) 188 189 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig, opts environs.BootstrapDialOpts) error { 190 icfg.Bootstrap.BootstrapMachineInstanceId = result.Instance.Id() 191 icfg.Bootstrap.BootstrapMachineHardwareCharacteristics = result.Hardware 192 envConfig := env.Config() 193 if result.Config != nil { 194 updated, err := envConfig.Apply(result.Config.UnknownAttrs()) 195 if err != nil { 196 return errors.Trace(err) 197 } 198 envConfig = updated 199 } 200 if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil { 201 return err 202 } 203 maybeSetBridge(icfg) 204 return FinishBootstrap(ctx, client, env, result.Instance, icfg, opts) 205 } 206 return result, selectedSeries, finalize, nil 207 } 208 209 func formatHardware(hw *instance.HardwareCharacteristics) string { 210 if hw == nil { 211 return "" 212 } 213 out := make([]string, 0, 3) 214 if hw.Arch != nil && *hw.Arch != "" { 215 out = append(out, fmt.Sprintf("arch=%s", *hw.Arch)) 216 } 217 if hw.Mem != nil && *hw.Mem > 0 { 218 out = append(out, fmt.Sprintf("mem=%s", formatMemory(*hw.Mem))) 219 } 220 if hw.CpuCores != nil && *hw.CpuCores > 0 { 221 out = append(out, fmt.Sprintf("cores=%d", *hw.CpuCores)) 222 } 223 return strings.Join(out, " ") 224 } 225 226 func formatMemory(m uint64) string { 227 if m < 1024 { 228 return fmt.Sprintf("%dM", m) 229 } 230 s := fmt.Sprintf("%.1f", float32(m)/1024.0) 231 return strings.TrimSuffix(s, ".0") + "G" 232 } 233 234 // FinishBootstrap completes the bootstrap process by connecting 235 // to the instance via SSH and carrying out the cloud-config. 236 // 237 // Note: FinishBootstrap is exposed so it can be replaced for testing. 238 var FinishBootstrap = func( 239 ctx environs.BootstrapContext, 240 client ssh.Client, 241 env environs.Environ, 242 inst instance.Instance, 243 instanceConfig *instancecfg.InstanceConfig, 244 opts environs.BootstrapDialOpts, 245 ) error { 246 interrupted := make(chan os.Signal, 1) 247 ctx.InterruptNotify(interrupted) 248 defer ctx.StopInterruptNotify(interrupted) 249 addr, err := WaitSSH( 250 ctx.GetStderr(), 251 interrupted, 252 client, 253 GetCheckNonceCommand(instanceConfig), 254 &RefreshableInstance{inst, env}, 255 opts, 256 ) 257 if err != nil { 258 return err 259 } 260 return ConfigureMachine(ctx, client, addr, instanceConfig) 261 } 262 263 func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string { 264 // Each attempt to connect to an address must verify the machine is the 265 // bootstrap machine by checking its nonce file exists and contains the 266 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 267 // until cloud-init has completed, which is necessary to ensure apt 268 // invocations don't trample each other. 269 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 270 checkNonceCommand := fmt.Sprintf(` 271 noncefile=%s 272 if [ ! -e "$noncefile" ]; then 273 echo "$noncefile does not exist" >&2 274 exit 1 275 fi 276 content=$(cat $noncefile) 277 if [ "$content" != %s ]; then 278 echo "$noncefile contents do not match machine nonce" >&2 279 exit 1 280 fi 281 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 282 return checkNonceCommand 283 } 284 285 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 286 // Bootstrap is synchronous, and will spawn a subprocess 287 // to complete the procedure. If the user hits Ctrl-C, 288 // SIGINT is sent to the foreground process attached to 289 // the terminal, which will be the ssh subprocess at this 290 // point. For that reason, we do not call StopInterruptNotify 291 // until this function completes. 292 cloudcfg, err := cloudinit.New(instanceConfig.Series) 293 if err != nil { 294 return errors.Trace(err) 295 } 296 297 // Set packaging update here 298 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 299 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 300 301 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 302 if err != nil { 303 return err 304 } 305 if err := udata.ConfigureJuju(); err != nil { 306 return err 307 } 308 configScript, err := cloudcfg.RenderScript() 309 if err != nil { 310 return err 311 } 312 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 313 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 314 Host: "ubuntu@" + host, 315 Client: client, 316 Config: cloudcfg, 317 ProgressWriter: ctx.GetStderr(), 318 Series: instanceConfig.Series, 319 }) 320 } 321 322 // InstanceRefresher is the subet of the Instance interface required 323 // for waiting for SSH access to become availble. 324 type InstanceRefresher interface { 325 // Refresh refreshes the addresses for the instance. 326 Refresh() error 327 328 // Addresses returns the addresses for the instance. 329 // To ensure that the results are up to date, call 330 // Refresh first. 331 Addresses() ([]network.Address, error) 332 333 // Status returns the provider-specific status for the 334 // instance. 335 Status() instance.InstanceStatus 336 } 337 338 type RefreshableInstance struct { 339 instance.Instance 340 Env environs.Environ 341 } 342 343 // Refresh refreshes the addresses for the instance. 344 func (i *RefreshableInstance) Refresh() error { 345 instances, err := i.Env.Instances([]instance.Id{i.Id()}) 346 if err != nil { 347 return errors.Trace(err) 348 } 349 i.Instance = instances[0] 350 return nil 351 } 352 353 type hostChecker struct { 354 addr network.Address 355 client ssh.Client 356 wg *sync.WaitGroup 357 358 // checkDelay is the amount of time to wait between retries. 359 checkDelay time.Duration 360 361 // checkHostScript is executed on the host via SSH. 362 // hostChecker.loop will return once the script 363 // runs without error. 364 checkHostScript string 365 366 // closed is closed to indicate that the host checker should 367 // return, without waiting for the result of any ongoing 368 // attempts. 369 closed <-chan struct{} 370 } 371 372 // Close implements io.Closer, as required by parallel.Try. 373 func (*hostChecker) Close() error { 374 return nil 375 } 376 377 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 378 defer hc.wg.Done() 379 // The value of connectSSH is taken outside the goroutine that may outlive 380 // hostChecker.loop, or we evoke the wrath of the race detector. 381 connectSSH := connectSSH 382 done := make(chan error, 1) 383 var lastErr error 384 for { 385 address := hc.addr.Value 386 go func() { 387 done <- connectSSH(hc.client, address, hc.checkHostScript) 388 }() 389 select { 390 case <-dying: 391 return hc, lastErr 392 case lastErr = <-done: 393 if lastErr == nil { 394 return hc, nil 395 } 396 logger.Debugf("connection attempt for %s failed: %v", address, lastErr) 397 } 398 select { 399 case <-hc.closed: 400 return hc, lastErr 401 case <-dying: 402 case <-time.After(hc.checkDelay): 403 } 404 } 405 } 406 407 type parallelHostChecker struct { 408 *parallel.Try 409 client ssh.Client 410 stderr io.Writer 411 wg sync.WaitGroup 412 413 // active is a map of adresses to channels for addresses actively 414 // being tested. The goroutine testing the address will continue 415 // to attempt connecting to the address until it succeeds, the Try 416 // is killed, or the corresponding channel in this map is closed. 417 active map[network.Address]chan struct{} 418 419 // checkDelay is how long each hostChecker waits between attempts. 420 checkDelay time.Duration 421 422 // checkHostScript is the script to run on each host to check that 423 // it is the host we expect. 424 checkHostScript string 425 } 426 427 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 428 for _, addr := range addrs { 429 if _, ok := p.active[addr]; ok { 430 continue 431 } 432 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 433 closed := make(chan struct{}) 434 hc := &hostChecker{ 435 addr: addr, 436 client: p.client, 437 checkDelay: p.checkDelay, 438 checkHostScript: p.checkHostScript, 439 closed: closed, 440 wg: &p.wg, 441 } 442 p.wg.Add(1) 443 p.active[addr] = closed 444 p.Start(hc.loop) 445 } 446 } 447 448 // Close prevents additional functions from being added to 449 // the Try, and tells each active hostChecker to exit. 450 func (p *parallelHostChecker) Close() error { 451 // We signal each checker to stop and wait for them 452 // each to complete; this allows us to get the error, 453 // as opposed to when using try.Kill which does not 454 // wait for the functions to complete. 455 p.Try.Close() 456 for _, ch := range p.active { 457 close(ch) 458 } 459 return nil 460 } 461 462 // connectSSH is called to connect to the specified host and 463 // execute the "checkHostScript" bash script on it. 464 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 465 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 466 cmd.Stdin = strings.NewReader(checkHostScript) 467 output, err := cmd.CombinedOutput() 468 if err != nil && len(output) > 0 { 469 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 470 } 471 return err 472 } 473 474 // WaitSSH waits for the instance to be assigned a routable 475 // address, then waits until we can connect to it via SSH. 476 // 477 // waitSSH attempts on all addresses returned by the instance 478 // in parallel; the first succeeding one wins. We ensure that 479 // private addresses are for the correct machine by checking 480 // the presence of a file on the machine that contains the 481 // machine's nonce. The "checkHostScript" is a bash script 482 // that performs this file check. 483 func WaitSSH( 484 stdErr io.Writer, 485 interrupted <-chan os.Signal, 486 client ssh.Client, 487 checkHostScript string, 488 inst InstanceRefresher, 489 opts environs.BootstrapDialOpts, 490 ) (addr string, err error) { 491 globalTimeout := time.After(opts.Timeout) 492 pollAddresses := time.NewTimer(0) 493 494 // checker checks each address in a loop, in parallel, 495 // until one succeeds, the global timeout is reached, 496 // or the tomb is killed. 497 checker := parallelHostChecker{ 498 Try: parallel.NewTry(0, nil), 499 client: client, 500 stderr: stdErr, 501 active: make(map[network.Address]chan struct{}), 502 checkDelay: opts.RetryDelay, 503 checkHostScript: checkHostScript, 504 } 505 defer checker.wg.Wait() 506 defer checker.Kill() 507 508 fmt.Fprintln(stdErr, "Waiting for address") 509 for { 510 select { 511 case <-pollAddresses.C: 512 pollAddresses.Reset(opts.AddressesDelay) 513 if err := inst.Refresh(); err != nil { 514 return "", fmt.Errorf("refreshing addresses: %v", err) 515 } 516 instanceStatus := inst.Status() 517 if instanceStatus.Status == status.ProvisioningError { 518 if instanceStatus.Message != "" { 519 return "", errors.Errorf("instance provisioning failed (%v)", instanceStatus.Message) 520 } 521 return "", errors.Errorf("instance provisioning failed") 522 } 523 addresses, err := inst.Addresses() 524 if err != nil { 525 return "", fmt.Errorf("getting addresses: %v", err) 526 } 527 checker.UpdateAddresses(addresses) 528 case <-globalTimeout: 529 checker.Close() 530 lastErr := checker.Wait() 531 format := "waited for %v " 532 args := []interface{}{opts.Timeout} 533 if len(checker.active) == 0 { 534 format += "without getting any addresses" 535 } else { 536 format += "without being able to connect" 537 } 538 if lastErr != nil && lastErr != parallel.ErrStopped { 539 format += ": %v" 540 args = append(args, lastErr) 541 } 542 return "", fmt.Errorf(format, args...) 543 case <-interrupted: 544 return "", fmt.Errorf("interrupted") 545 case <-checker.Dead(): 546 result, err := checker.Result() 547 if err != nil { 548 return "", err 549 } 550 return result.(*hostChecker).addr.Value, nil 551 } 552 } 553 }