github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/series" 20 "github.com/juju/utils/shell" 21 "github.com/juju/utils/ssh" 22 23 "github.com/juju/juju/agent" 24 "github.com/juju/juju/cloudconfig" 25 "github.com/juju/juju/cloudconfig/cloudinit" 26 "github.com/juju/juju/cloudconfig/instancecfg" 27 "github.com/juju/juju/cloudconfig/sshinit" 28 "github.com/juju/juju/environs" 29 "github.com/juju/juju/environs/config" 30 "github.com/juju/juju/environs/imagemetadata" 31 "github.com/juju/juju/environs/simplestreams" 32 "github.com/juju/juju/instance" 33 "github.com/juju/juju/network" 34 "github.com/juju/juju/status" 35 coretools "github.com/juju/juju/tools" 36 ) 37 38 var logger = loggo.GetLogger("juju.provider.common") 39 40 // Bootstrap is a common implementation of the Bootstrap method defined on 41 // environs.Environ; we strongly recommend that this implementation be used 42 // when writing a new provider. 43 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 44 ) (*environs.BootstrapResult, error) { 45 result, series, finalizer, err := BootstrapInstance(ctx, env, args) 46 if err != nil { 47 return nil, errors.Trace(err) 48 } 49 50 bsResult := &environs.BootstrapResult{ 51 Arch: *result.Hardware.Arch, 52 Series: series, 53 Finalize: finalizer, 54 } 55 return bsResult, nil 56 } 57 58 // BootstrapInstance creates a new instance with the series and architecture 59 // of its choice, constrained to those of the available tools, and 60 // returns the instance result, series, and a function that 61 // must be called to finalize the bootstrap process by transferring 62 // the tools and installing the initial Juju controller. 63 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 64 // is also exported so that providers can manipulate the started instance. 65 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 66 ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) { 67 // TODO make safe in the case of racing Bootstraps 68 // If two Bootstraps are called concurrently, there's 69 // no way to make sure that only one succeeds. 70 71 // First thing, ensure we have tools otherwise there's no point. 72 if args.BootstrapSeries != "" { 73 selectedSeries = args.BootstrapSeries 74 } else { 75 selectedSeries = config.PreferredSeries(env.Config()) 76 } 77 availableTools, err := args.AvailableTools.Match(coretools.Filter{ 78 Series: selectedSeries, 79 }) 80 if err != nil { 81 return nil, "", nil, err 82 } 83 84 // Filter image metadata to the selected series. 85 var imageMetadata []*imagemetadata.ImageMetadata 86 seriesVersion, err := series.SeriesVersion(selectedSeries) 87 if err != nil { 88 return nil, "", nil, errors.Trace(err) 89 } 90 for _, m := range args.ImageMetadata { 91 if m.Version != seriesVersion { 92 continue 93 } 94 imageMetadata = append(imageMetadata, m) 95 } 96 97 // Get the bootstrap SSH client. Do this early, so we know 98 // not to bother with any of the below if we can't finish the job. 99 client := ssh.DefaultClient 100 if client == nil { 101 // This should never happen: if we don't have OpenSSH, then 102 // go.crypto/ssh should be used with an auto-generated key. 103 return nil, "", nil, fmt.Errorf("no SSH client available") 104 } 105 106 publicKey, err := simplestreams.UserPublicSigningKey() 107 if err != nil { 108 return nil, "", nil, err 109 } 110 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig( 111 args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey, 112 ) 113 if err != nil { 114 return nil, "", nil, err 115 } 116 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 117 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 118 instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs) 119 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 120 // If we need to override the default bridge name, do it now. When 121 // args.ContainerBridgeName is empty, the default names for LXC 122 // (lxcbr0) and KVM (virbr0) will be used. 123 if args.ContainerBridgeName != "" { 124 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 125 if icfg.AgentEnvironment == nil { 126 icfg.AgentEnvironment = make(map[string]string) 127 } 128 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 129 } 130 } 131 maybeSetBridge(instanceConfig) 132 133 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 134 instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error { 135 fmt.Fprintf(ctx.GetStderr(), "%s \r", info) 136 return nil 137 } 138 result, err := env.StartInstance(environs.StartInstanceParams{ 139 Constraints: args.BootstrapConstraints, 140 Tools: availableTools, 141 InstanceConfig: instanceConfig, 142 Placement: args.Placement, 143 ImageMetadata: imageMetadata, 144 StatusCallback: instanceStatus, 145 }) 146 if err != nil { 147 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 148 } 149 fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id()) 150 151 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error { 152 icfg.InstanceId = result.Instance.Id() 153 icfg.HardwareCharacteristics = result.Hardware 154 envConfig := env.Config() 155 if result.Config != nil { 156 updated, err := envConfig.Apply(result.Config.UnknownAttrs()) 157 if err != nil { 158 return errors.Trace(err) 159 } 160 envConfig = updated 161 } 162 if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil { 163 return err 164 } 165 maybeSetBridge(icfg) 166 return FinishBootstrap(ctx, client, env, result.Instance, icfg) 167 } 168 return result, selectedSeries, finalize, nil 169 } 170 171 // FinishBootstrap completes the bootstrap process by connecting 172 // to the instance via SSH and carrying out the cloud-config. 173 // 174 // Note: FinishBootstrap is exposed so it can be replaced for testing. 175 var FinishBootstrap = func( 176 ctx environs.BootstrapContext, 177 client ssh.Client, 178 env environs.Environ, 179 inst instance.Instance, 180 instanceConfig *instancecfg.InstanceConfig, 181 ) error { 182 interrupted := make(chan os.Signal, 1) 183 ctx.InterruptNotify(interrupted) 184 defer ctx.StopInterruptNotify(interrupted) 185 addr, err := WaitSSH( 186 ctx.GetStderr(), 187 interrupted, 188 client, 189 GetCheckNonceCommand(instanceConfig), 190 &RefreshableInstance{inst, env}, 191 instanceConfig.Config.BootstrapSSHOpts(), 192 ) 193 if err != nil { 194 return err 195 } 196 return ConfigureMachine(ctx, client, addr, instanceConfig) 197 } 198 199 func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string { 200 // Each attempt to connect to an address must verify the machine is the 201 // bootstrap machine by checking its nonce file exists and contains the 202 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 203 // until cloud-init has completed, which is necessary to ensure apt 204 // invocations don't trample each other. 205 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 206 checkNonceCommand := fmt.Sprintf(` 207 noncefile=%s 208 if [ ! -e "$noncefile" ]; then 209 echo "$noncefile does not exist" >&2 210 exit 1 211 fi 212 content=$(cat $noncefile) 213 if [ "$content" != %s ]; then 214 echo "$noncefile contents do not match machine nonce" >&2 215 exit 1 216 fi 217 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 218 return checkNonceCommand 219 } 220 221 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 222 // Bootstrap is synchronous, and will spawn a subprocess 223 // to complete the procedure. If the user hits Ctrl-C, 224 // SIGINT is sent to the foreground process attached to 225 // the terminal, which will be the ssh subprocess at this 226 // point. For that reason, we do not call StopInterruptNotify 227 // until this function completes. 228 cloudcfg, err := cloudinit.New(instanceConfig.Series) 229 if err != nil { 230 return errors.Trace(err) 231 } 232 233 // Set packaging update here 234 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 235 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 236 237 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 238 if err != nil { 239 return err 240 } 241 if err := udata.ConfigureJuju(); err != nil { 242 return err 243 } 244 configScript, err := cloudcfg.RenderScript() 245 if err != nil { 246 return err 247 } 248 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 249 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 250 Host: "ubuntu@" + host, 251 Client: client, 252 Config: cloudcfg, 253 ProgressWriter: ctx.GetStderr(), 254 Series: instanceConfig.Series, 255 }) 256 } 257 258 type Addresser interface { 259 // Refresh refreshes the addresses for the instance. 260 Refresh() error 261 262 // Addresses returns the addresses for the instance. 263 // To ensure that the results are up to date, call 264 // Refresh first. 265 Addresses() ([]network.Address, error) 266 } 267 268 type RefreshableInstance struct { 269 instance.Instance 270 Env environs.Environ 271 } 272 273 // Refresh refreshes the addresses for the instance. 274 func (i *RefreshableInstance) Refresh() error { 275 instances, err := i.Env.Instances([]instance.Id{i.Id()}) 276 if err != nil { 277 return errors.Trace(err) 278 } 279 i.Instance = instances[0] 280 return nil 281 } 282 283 type hostChecker struct { 284 addr network.Address 285 client ssh.Client 286 wg *sync.WaitGroup 287 288 // checkDelay is the amount of time to wait between retries. 289 checkDelay time.Duration 290 291 // checkHostScript is executed on the host via SSH. 292 // hostChecker.loop will return once the script 293 // runs without error. 294 checkHostScript string 295 296 // closed is closed to indicate that the host checker should 297 // return, without waiting for the result of any ongoing 298 // attempts. 299 closed <-chan struct{} 300 } 301 302 // Close implements io.Closer, as required by parallel.Try. 303 func (*hostChecker) Close() error { 304 return nil 305 } 306 307 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 308 defer hc.wg.Done() 309 // The value of connectSSH is taken outside the goroutine that may outlive 310 // hostChecker.loop, or we evoke the wrath of the race detector. 311 connectSSH := connectSSH 312 done := make(chan error, 1) 313 var lastErr error 314 for { 315 address := hc.addr.Value 316 go func() { 317 done <- connectSSH(hc.client, address, hc.checkHostScript) 318 }() 319 select { 320 case <-hc.closed: 321 return hc, lastErr 322 case <-dying: 323 return hc, lastErr 324 case lastErr = <-done: 325 if lastErr == nil { 326 return hc, nil 327 } else { 328 logger.Debugf("connection attempt for %s failed: %v", address, lastErr) 329 } 330 } 331 select { 332 case <-hc.closed: 333 case <-dying: 334 case <-time.After(hc.checkDelay): 335 } 336 } 337 } 338 339 type parallelHostChecker struct { 340 *parallel.Try 341 client ssh.Client 342 stderr io.Writer 343 wg sync.WaitGroup 344 345 // active is a map of adresses to channels for addresses actively 346 // being tested. The goroutine testing the address will continue 347 // to attempt connecting to the address until it succeeds, the Try 348 // is killed, or the corresponding channel in this map is closed. 349 active map[network.Address]chan struct{} 350 351 // checkDelay is how long each hostChecker waits between attempts. 352 checkDelay time.Duration 353 354 // checkHostScript is the script to run on each host to check that 355 // it is the host we expect. 356 checkHostScript string 357 } 358 359 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 360 for _, addr := range addrs { 361 if _, ok := p.active[addr]; ok { 362 continue 363 } 364 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 365 closed := make(chan struct{}) 366 hc := &hostChecker{ 367 addr: addr, 368 client: p.client, 369 checkDelay: p.checkDelay, 370 checkHostScript: p.checkHostScript, 371 closed: closed, 372 wg: &p.wg, 373 } 374 p.wg.Add(1) 375 p.active[addr] = closed 376 p.Start(hc.loop) 377 } 378 } 379 380 // Close prevents additional functions from being added to 381 // the Try, and tells each active hostChecker to exit. 382 func (p *parallelHostChecker) Close() error { 383 // We signal each checker to stop and wait for them 384 // each to complete; this allows us to get the error, 385 // as opposed to when using try.Kill which does not 386 // wait for the functions to complete. 387 p.Try.Close() 388 for _, ch := range p.active { 389 close(ch) 390 } 391 return nil 392 } 393 394 // connectSSH is called to connect to the specified host and 395 // execute the "checkHostScript" bash script on it. 396 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 397 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 398 cmd.Stdin = strings.NewReader(checkHostScript) 399 output, err := cmd.CombinedOutput() 400 if err != nil && len(output) > 0 { 401 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 402 } 403 return err 404 } 405 406 // waitSSH waits for the instance to be assigned a routable 407 // address, then waits until we can connect to it via SSH. 408 // 409 // waitSSH attempts on all addresses returned by the instance 410 // in parallel; the first succeeding one wins. We ensure that 411 // private addresses are for the correct machine by checking 412 // the presence of a file on the machine that contains the 413 // machine's nonce. The "checkHostScript" is a bash script 414 // that performs this file check. 415 func WaitSSH(stdErr io.Writer, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst Addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 416 globalTimeout := time.After(timeout.Timeout) 417 pollAddresses := time.NewTimer(0) 418 419 // checker checks each address in a loop, in parallel, 420 // until one succeeds, the global timeout is reached, 421 // or the tomb is killed. 422 checker := parallelHostChecker{ 423 Try: parallel.NewTry(0, nil), 424 client: client, 425 stderr: stdErr, 426 active: make(map[network.Address]chan struct{}), 427 checkDelay: timeout.RetryDelay, 428 checkHostScript: checkHostScript, 429 } 430 defer checker.wg.Wait() 431 defer checker.Kill() 432 433 fmt.Fprintln(stdErr, "Waiting for address") 434 for { 435 select { 436 case <-pollAddresses.C: 437 pollAddresses.Reset(timeout.AddressesDelay) 438 if err := inst.Refresh(); err != nil { 439 return "", fmt.Errorf("refreshing addresses: %v", err) 440 } 441 addresses, err := inst.Addresses() 442 if err != nil { 443 return "", fmt.Errorf("getting addresses: %v", err) 444 } 445 checker.UpdateAddresses(addresses) 446 case <-globalTimeout: 447 checker.Close() 448 lastErr := checker.Wait() 449 format := "waited for %v " 450 args := []interface{}{timeout.Timeout} 451 if len(checker.active) == 0 { 452 format += "without getting any addresses" 453 } else { 454 format += "without being able to connect" 455 } 456 if lastErr != nil && lastErr != parallel.ErrStopped { 457 format += ": %v" 458 args = append(args, lastErr) 459 } 460 return "", fmt.Errorf(format, args...) 461 case <-interrupted: 462 return "", fmt.Errorf("interrupted") 463 case <-checker.Dead(): 464 result, err := checker.Result() 465 if err != nil { 466 return "", err 467 } 468 return result.(*hostChecker).addr.Value, nil 469 } 470 } 471 }