github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/shell" 20 21 "github.com/juju/juju/agent" 22 "github.com/juju/juju/cloudconfig" 23 "github.com/juju/juju/cloudconfig/cloudinit" 24 "github.com/juju/juju/cloudconfig/instancecfg" 25 "github.com/juju/juju/cloudconfig/sshinit" 26 "github.com/juju/juju/environs" 27 "github.com/juju/juju/environs/config" 28 "github.com/juju/juju/instance" 29 "github.com/juju/juju/network" 30 coretools "github.com/juju/juju/tools" 31 "github.com/juju/juju/utils/ssh" 32 ) 33 34 var logger = loggo.GetLogger("juju.provider.common") 35 36 // Bootstrap is a common implementation of the Bootstrap method defined on 37 // environs.Environ; we strongly recommend that this implementation be used 38 // when writing a new provider. 39 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 40 ) (arch, series string, _ environs.BootstrapFinalizer, err error) { 41 if result, series, finalizer, err := BootstrapInstance(ctx, env, args); err == nil { 42 return *result.Hardware.Arch, series, finalizer, nil 43 } else { 44 return "", "", nil, err 45 } 46 } 47 48 // BootstrapInstance creates a new instance with the series and architecture 49 // of its choice, constrained to those of the available tools, and 50 // returns the instance result, series, and a function that 51 // must be called to finalize the bootstrap process by transferring 52 // the tools and installing the initial Juju state server. 53 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 54 // is also exported so that providers can manipulate the started instance. 55 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 56 ) (_ *environs.StartInstanceResult, series string, _ environs.BootstrapFinalizer, err error) { 57 // TODO make safe in the case of racing Bootstraps 58 // If two Bootstraps are called concurrently, there's 59 // no way to make sure that only one succeeds. 60 61 // First thing, ensure we have tools otherwise there's no point. 62 series = config.PreferredSeries(env.Config()) 63 availableTools, err := args.AvailableTools.Match(coretools.Filter{Series: series}) 64 if err != nil { 65 return nil, "", nil, err 66 } 67 68 // Get the bootstrap SSH client. Do this early, so we know 69 // not to bother with any of the below if we can't finish the job. 70 client := ssh.DefaultClient 71 if client == nil { 72 // This should never happen: if we don't have OpenSSH, then 73 // go.crypto/ssh should be used with an auto-generated key. 74 return nil, "", nil, fmt.Errorf("no SSH client available") 75 } 76 77 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(args.Constraints, series) 78 if err != nil { 79 return nil, "", nil, err 80 } 81 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 82 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 83 instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs) 84 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 85 // If we need to override the default bridge name, do it now. When 86 // args.ContainerBridgeName is empty, the default names for LXC 87 // (lxcbr0) and KVM (virbr0) will be used. 88 if args.ContainerBridgeName != "" { 89 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 90 if icfg.AgentEnvironment == nil { 91 icfg.AgentEnvironment = make(map[string]string) 92 } 93 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 94 } 95 } 96 maybeSetBridge(instanceConfig) 97 98 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 99 result, err := env.StartInstance(environs.StartInstanceParams{ 100 Constraints: args.Constraints, 101 Tools: availableTools, 102 InstanceConfig: instanceConfig, 103 Placement: args.Placement, 104 }) 105 if err != nil { 106 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 107 } 108 fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id()) 109 110 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error { 111 icfg.InstanceId = result.Instance.Id() 112 icfg.HardwareCharacteristics = result.Hardware 113 if err := instancecfg.FinishInstanceConfig(icfg, env.Config()); err != nil { 114 return err 115 } 116 maybeSetBridge(icfg) 117 return FinishBootstrap(ctx, client, env, result.Instance, icfg) 118 } 119 return result, series, finalize, nil 120 } 121 122 // FinishBootstrap completes the bootstrap process by connecting 123 // to the instance via SSH and carrying out the cloud-config. 124 // 125 // Note: FinishBootstrap is exposed so it can be replaced for testing. 126 var FinishBootstrap = func( 127 ctx environs.BootstrapContext, 128 client ssh.Client, 129 env environs.Environ, 130 inst instance.Instance, 131 instanceConfig *instancecfg.InstanceConfig, 132 ) error { 133 interrupted := make(chan os.Signal, 1) 134 ctx.InterruptNotify(interrupted) 135 defer ctx.StopInterruptNotify(interrupted) 136 // Each attempt to connect to an address must verify the machine is the 137 // bootstrap machine by checking its nonce file exists and contains the 138 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 139 // until cloud-init has completed, which is necessary to ensure apt 140 // invocations don't trample each other. 141 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 142 checkNonceCommand := fmt.Sprintf(` 143 noncefile=%s 144 if [ ! -e "$noncefile" ]; then 145 echo "$noncefile does not exist" >&2 146 exit 1 147 fi 148 content=$(cat $noncefile) 149 if [ "$content" != %s ]; then 150 echo "$noncefile contents do not match machine nonce" >&2 151 exit 1 152 fi 153 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 154 addr, err := waitSSH( 155 ctx, 156 interrupted, 157 client, 158 checkNonceCommand, 159 &refreshableInstance{inst, env}, 160 instanceConfig.Config.BootstrapSSHOpts(), 161 ) 162 if err != nil { 163 return err 164 } 165 return ConfigureMachine(ctx, client, addr, instanceConfig) 166 } 167 168 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 169 // Bootstrap is synchronous, and will spawn a subprocess 170 // to complete the procedure. If the user hits Ctrl-C, 171 // SIGINT is sent to the foreground process attached to 172 // the terminal, which will be the ssh subprocess at this 173 // point. For that reason, we do not call StopInterruptNotify 174 // until this function completes. 175 cloudcfg, err := cloudinit.New(instanceConfig.Series) 176 if err != nil { 177 return errors.Trace(err) 178 } 179 180 // Set packaging update here 181 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 182 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 183 184 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 185 if err != nil { 186 return err 187 } 188 if err := udata.ConfigureJuju(); err != nil { 189 return err 190 } 191 configScript, err := cloudcfg.RenderScript() 192 if err != nil { 193 return err 194 } 195 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 196 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 197 Host: "ubuntu@" + host, 198 Client: client, 199 Config: cloudcfg, 200 ProgressWriter: ctx.GetStderr(), 201 Series: instanceConfig.Series, 202 }) 203 } 204 205 type addresser interface { 206 // Refresh refreshes the addresses for the instance. 207 Refresh() error 208 209 // Addresses returns the addresses for the instance. 210 // To ensure that the results are up to date, call 211 // Refresh first. 212 Addresses() ([]network.Address, error) 213 } 214 215 type refreshableInstance struct { 216 instance.Instance 217 env environs.Environ 218 } 219 220 // Refresh refreshes the addresses for the instance. 221 func (i *refreshableInstance) Refresh() error { 222 instances, err := i.env.Instances([]instance.Id{i.Id()}) 223 if err != nil { 224 return errors.Trace(err) 225 } 226 i.Instance = instances[0] 227 return nil 228 } 229 230 type hostChecker struct { 231 addr network.Address 232 client ssh.Client 233 wg *sync.WaitGroup 234 235 // checkDelay is the amount of time to wait between retries. 236 checkDelay time.Duration 237 238 // checkHostScript is executed on the host via SSH. 239 // hostChecker.loop will return once the script 240 // runs without error. 241 checkHostScript string 242 243 // closed is closed to indicate that the host checker should 244 // return, without waiting for the result of any ongoing 245 // attempts. 246 closed <-chan struct{} 247 } 248 249 // Close implements io.Closer, as required by parallel.Try. 250 func (*hostChecker) Close() error { 251 return nil 252 } 253 254 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 255 defer hc.wg.Done() 256 // The value of connectSSH is taken outside the goroutine that may outlive 257 // hostChecker.loop, or we evoke the wrath of the race detector. 258 connectSSH := connectSSH 259 done := make(chan error, 1) 260 var lastErr error 261 for { 262 address := hc.addr.Value 263 go func() { 264 done <- connectSSH(hc.client, address, hc.checkHostScript) 265 }() 266 select { 267 case <-hc.closed: 268 return hc, lastErr 269 case <-dying: 270 return hc, lastErr 271 case lastErr = <-done: 272 if lastErr == nil { 273 return hc, nil 274 } else { 275 logger.Debugf("connection attempt for %s failed: %v", address, lastErr) 276 } 277 } 278 select { 279 case <-hc.closed: 280 case <-dying: 281 case <-time.After(hc.checkDelay): 282 } 283 } 284 } 285 286 type parallelHostChecker struct { 287 *parallel.Try 288 client ssh.Client 289 stderr io.Writer 290 wg sync.WaitGroup 291 292 // active is a map of adresses to channels for addresses actively 293 // being tested. The goroutine testing the address will continue 294 // to attempt connecting to the address until it succeeds, the Try 295 // is killed, or the corresponding channel in this map is closed. 296 active map[network.Address]chan struct{} 297 298 // checkDelay is how long each hostChecker waits between attempts. 299 checkDelay time.Duration 300 301 // checkHostScript is the script to run on each host to check that 302 // it is the host we expect. 303 checkHostScript string 304 } 305 306 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 307 for _, addr := range addrs { 308 if _, ok := p.active[addr]; ok { 309 continue 310 } 311 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 312 closed := make(chan struct{}) 313 hc := &hostChecker{ 314 addr: addr, 315 client: p.client, 316 checkDelay: p.checkDelay, 317 checkHostScript: p.checkHostScript, 318 closed: closed, 319 wg: &p.wg, 320 } 321 p.wg.Add(1) 322 p.active[addr] = closed 323 p.Start(hc.loop) 324 } 325 } 326 327 // Close prevents additional functions from being added to 328 // the Try, and tells each active hostChecker to exit. 329 func (p *parallelHostChecker) Close() error { 330 // We signal each checker to stop and wait for them 331 // each to complete; this allows us to get the error, 332 // as opposed to when using try.Kill which does not 333 // wait for the functions to complete. 334 p.Try.Close() 335 for _, ch := range p.active { 336 close(ch) 337 } 338 return nil 339 } 340 341 // connectSSH is called to connect to the specified host and 342 // execute the "checkHostScript" bash script on it. 343 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 344 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 345 cmd.Stdin = strings.NewReader(checkHostScript) 346 output, err := cmd.CombinedOutput() 347 if err != nil && len(output) > 0 { 348 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 349 } 350 return err 351 } 352 353 // waitSSH waits for the instance to be assigned a routable 354 // address, then waits until we can connect to it via SSH. 355 // 356 // waitSSH attempts on all addresses returned by the instance 357 // in parallel; the first succeeding one wins. We ensure that 358 // private addresses are for the correct machine by checking 359 // the presence of a file on the machine that contains the 360 // machine's nonce. The "checkHostScript" is a bash script 361 // that performs this file check. 362 func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 363 globalTimeout := time.After(timeout.Timeout) 364 pollAddresses := time.NewTimer(0) 365 366 // checker checks each address in a loop, in parallel, 367 // until one succeeds, the global timeout is reached, 368 // or the tomb is killed. 369 checker := parallelHostChecker{ 370 Try: parallel.NewTry(0, nil), 371 client: client, 372 stderr: ctx.GetStderr(), 373 active: make(map[network.Address]chan struct{}), 374 checkDelay: timeout.RetryDelay, 375 checkHostScript: checkHostScript, 376 } 377 defer checker.wg.Wait() 378 defer checker.Kill() 379 380 fmt.Fprintln(ctx.GetStderr(), "Waiting for address") 381 for { 382 select { 383 case <-pollAddresses.C: 384 pollAddresses.Reset(timeout.AddressesDelay) 385 if err := inst.Refresh(); err != nil { 386 return "", fmt.Errorf("refreshing addresses: %v", err) 387 } 388 addresses, err := inst.Addresses() 389 if err != nil { 390 return "", fmt.Errorf("getting addresses: %v", err) 391 } 392 checker.UpdateAddresses(addresses) 393 case <-globalTimeout: 394 checker.Close() 395 lastErr := checker.Wait() 396 format := "waited for %v " 397 args := []interface{}{timeout.Timeout} 398 if len(checker.active) == 0 { 399 format += "without getting any addresses" 400 } else { 401 format += "without being able to connect" 402 } 403 if lastErr != nil && lastErr != parallel.ErrStopped { 404 format += ": %v" 405 args = append(args, lastErr) 406 } 407 return "", fmt.Errorf(format, args...) 408 case <-interrupted: 409 return "", fmt.Errorf("interrupted") 410 case <-checker.Dead(): 411 result, err := checker.Result() 412 if err != nil { 413 return "", err 414 } 415 return result.(*hostChecker).addr.Value, nil 416 } 417 } 418 }