github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/shell" 20 21 "github.com/juju/juju/agent" 22 "github.com/juju/juju/cloudconfig" 23 "github.com/juju/juju/cloudconfig/cloudinit" 24 "github.com/juju/juju/cloudconfig/instancecfg" 25 "github.com/juju/juju/cloudconfig/sshinit" 26 "github.com/juju/juju/environs" 27 "github.com/juju/juju/environs/config" 28 "github.com/juju/juju/instance" 29 "github.com/juju/juju/network" 30 coretools "github.com/juju/juju/tools" 31 "github.com/juju/juju/utils/ssh" 32 ) 33 34 var logger = loggo.GetLogger("juju.provider.common") 35 36 // Bootstrap is a common implementation of the Bootstrap method defined on 37 // environs.Environ; we strongly recommend that this implementation be used 38 // when writing a new provider. 39 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 40 ) (arch, series string, _ environs.BootstrapFinalizer, err error) { 41 if result, series, finalizer, err := BootstrapInstance(ctx, env, args); err == nil { 42 return *result.Hardware.Arch, series, finalizer, nil 43 } else { 44 return "", "", nil, err 45 } 46 } 47 48 // BootstrapInstance creates a new instance with the series and architecture 49 // of its choice, constrained to those of the available tools, and 50 // returns the instance result, series, and a function that 51 // must be called to finalize the bootstrap process by transferring 52 // the tools and installing the initial Juju state server. 53 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 54 // is also exported so that providers can manipulate the started instance. 55 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 56 ) (_ *environs.StartInstanceResult, series string, _ environs.BootstrapFinalizer, err error) { 57 // TODO make safe in the case of racing Bootstraps 58 // If two Bootstraps are called concurrently, there's 59 // no way to make sure that only one succeeds. 60 61 // First thing, ensure we have tools otherwise there's no point. 62 series = config.PreferredSeries(env.Config()) 63 availableTools, err := args.AvailableTools.Match(coretools.Filter{Series: series}) 64 if err != nil { 65 return nil, "", nil, err 66 } 67 68 // Get the bootstrap SSH client. Do this early, so we know 69 // not to bother with any of the below if we can't finish the job. 70 client := ssh.DefaultClient 71 if client == nil { 72 // This should never happen: if we don't have OpenSSH, then 73 // go.crypto/ssh should be used with an auto-generated key. 74 return nil, "", nil, fmt.Errorf("no SSH client available") 75 } 76 77 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(args.Constraints, series) 78 if err != nil { 79 return nil, "", nil, err 80 } 81 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 82 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 83 instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs) 84 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 85 // If we need to override the default bridge name, do it now. When 86 // args.ContainerBridgeName is empty, the default names for LXC 87 // (lxcbr0) and KVM (virbr0) will be used. 88 if args.ContainerBridgeName != "" { 89 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 90 if icfg.AgentEnvironment == nil { 91 icfg.AgentEnvironment = make(map[string]string) 92 } 93 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 94 } 95 } 96 maybeSetBridge(instanceConfig) 97 98 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 99 result, err := env.StartInstance(environs.StartInstanceParams{ 100 Constraints: args.Constraints, 101 Tools: availableTools, 102 InstanceConfig: instanceConfig, 103 Placement: args.Placement, 104 }) 105 if err != nil { 106 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 107 } 108 fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id()) 109 110 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error { 111 icfg.InstanceId = result.Instance.Id() 112 icfg.HardwareCharacteristics = result.Hardware 113 if err := instancecfg.FinishInstanceConfig(icfg, env.Config()); err != nil { 114 return err 115 } 116 maybeSetBridge(icfg) 117 return FinishBootstrap(ctx, client, result.Instance, icfg) 118 } 119 return result, series, finalize, nil 120 } 121 122 // FinishBootstrap completes the bootstrap process by connecting 123 // to the instance via SSH and carrying out the cloud-config. 124 // 125 // Note: FinishBootstrap is exposed so it can be replaced for testing. 126 var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, instanceConfig *instancecfg.InstanceConfig) error { 127 interrupted := make(chan os.Signal, 1) 128 ctx.InterruptNotify(interrupted) 129 defer ctx.StopInterruptNotify(interrupted) 130 // Each attempt to connect to an address must verify the machine is the 131 // bootstrap machine by checking its nonce file exists and contains the 132 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 133 // until cloud-init has completed, which is necessary to ensure apt 134 // invocations don't trample each other. 135 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 136 checkNonceCommand := fmt.Sprintf(` 137 noncefile=%s 138 if [ ! -e "$noncefile" ]; then 139 echo "$noncefile does not exist" >&2 140 exit 1 141 fi 142 content=$(cat $noncefile) 143 if [ "$content" != %s ]; then 144 echo "$noncefile contents do not match machine nonce" >&2 145 exit 1 146 fi 147 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 148 addr, err := waitSSH( 149 ctx, 150 interrupted, 151 client, 152 checkNonceCommand, 153 inst, 154 instanceConfig.Config.BootstrapSSHOpts(), 155 ) 156 if err != nil { 157 return err 158 } 159 return ConfigureMachine(ctx, client, addr, instanceConfig) 160 } 161 162 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 163 // Bootstrap is synchronous, and will spawn a subprocess 164 // to complete the procedure. If the user hits Ctrl-C, 165 // SIGINT is sent to the foreground process attached to 166 // the terminal, which will be the ssh subprocess at this 167 // point. For that reason, we do not call StopInterruptNotify 168 // until this function completes. 169 cloudcfg, err := cloudinit.New(instanceConfig.Series) 170 if err != nil { 171 return errors.Trace(err) 172 } 173 174 // Set packaging update here 175 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 176 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 177 178 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 179 if err != nil { 180 return err 181 } 182 if err := udata.ConfigureJuju(); err != nil { 183 return err 184 } 185 configScript, err := cloudcfg.RenderScript() 186 if err != nil { 187 return err 188 } 189 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 190 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 191 Host: "ubuntu@" + host, 192 Client: client, 193 Config: cloudcfg, 194 ProgressWriter: ctx.GetStderr(), 195 Series: instanceConfig.Series, 196 }) 197 } 198 199 type addresser interface { 200 // Refresh refreshes the addresses for the instance. 201 Refresh() error 202 203 // Addresses returns the addresses for the instance. 204 // To ensure that the results are up to date, call 205 // Refresh first. 206 Addresses() ([]network.Address, error) 207 } 208 209 type hostChecker struct { 210 addr network.Address 211 client ssh.Client 212 wg *sync.WaitGroup 213 214 // checkDelay is the amount of time to wait between retries. 215 checkDelay time.Duration 216 217 // checkHostScript is executed on the host via SSH. 218 // hostChecker.loop will return once the script 219 // runs without error. 220 checkHostScript string 221 222 // closed is closed to indicate that the host checker should 223 // return, without waiting for the result of any ongoing 224 // attempts. 225 closed <-chan struct{} 226 } 227 228 // Close implements io.Closer, as required by parallel.Try. 229 func (*hostChecker) Close() error { 230 return nil 231 } 232 233 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 234 defer hc.wg.Done() 235 // The value of connectSSH is taken outside the goroutine that may outlive 236 // hostChecker.loop, or we evoke the wrath of the race detector. 237 connectSSH := connectSSH 238 done := make(chan error, 1) 239 var lastErr error 240 for { 241 address := hc.addr.Value 242 go func() { 243 done <- connectSSH(hc.client, address, hc.checkHostScript) 244 }() 245 select { 246 case <-hc.closed: 247 return hc, lastErr 248 case <-dying: 249 return hc, lastErr 250 case lastErr = <-done: 251 if lastErr == nil { 252 return hc, nil 253 } else { 254 logger.Debugf("connection attempt for %s failed: %v", address, lastErr) 255 } 256 } 257 select { 258 case <-hc.closed: 259 case <-dying: 260 case <-time.After(hc.checkDelay): 261 } 262 } 263 } 264 265 type parallelHostChecker struct { 266 *parallel.Try 267 client ssh.Client 268 stderr io.Writer 269 wg sync.WaitGroup 270 271 // active is a map of adresses to channels for addresses actively 272 // being tested. The goroutine testing the address will continue 273 // to attempt connecting to the address until it succeeds, the Try 274 // is killed, or the corresponding channel in this map is closed. 275 active map[network.Address]chan struct{} 276 277 // checkDelay is how long each hostChecker waits between attempts. 278 checkDelay time.Duration 279 280 // checkHostScript is the script to run on each host to check that 281 // it is the host we expect. 282 checkHostScript string 283 } 284 285 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 286 for _, addr := range addrs { 287 if _, ok := p.active[addr]; ok { 288 continue 289 } 290 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 291 closed := make(chan struct{}) 292 hc := &hostChecker{ 293 addr: addr, 294 client: p.client, 295 checkDelay: p.checkDelay, 296 checkHostScript: p.checkHostScript, 297 closed: closed, 298 wg: &p.wg, 299 } 300 p.wg.Add(1) 301 p.active[addr] = closed 302 p.Start(hc.loop) 303 } 304 } 305 306 // Close prevents additional functions from being added to 307 // the Try, and tells each active hostChecker to exit. 308 func (p *parallelHostChecker) Close() error { 309 // We signal each checker to stop and wait for them 310 // each to complete; this allows us to get the error, 311 // as opposed to when using try.Kill which does not 312 // wait for the functions to complete. 313 p.Try.Close() 314 for _, ch := range p.active { 315 close(ch) 316 } 317 return nil 318 } 319 320 // connectSSH is called to connect to the specified host and 321 // execute the "checkHostScript" bash script on it. 322 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 323 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 324 cmd.Stdin = strings.NewReader(checkHostScript) 325 output, err := cmd.CombinedOutput() 326 if err != nil && len(output) > 0 { 327 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 328 } 329 return err 330 } 331 332 // waitSSH waits for the instance to be assigned a routable 333 // address, then waits until we can connect to it via SSH. 334 // 335 // waitSSH attempts on all addresses returned by the instance 336 // in parallel; the first succeeding one wins. We ensure that 337 // private addresses are for the correct machine by checking 338 // the presence of a file on the machine that contains the 339 // machine's nonce. The "checkHostScript" is a bash script 340 // that performs this file check. 341 func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 342 globalTimeout := time.After(timeout.Timeout) 343 pollAddresses := time.NewTimer(0) 344 345 // checker checks each address in a loop, in parallel, 346 // until one succeeds, the global timeout is reached, 347 // or the tomb is killed. 348 checker := parallelHostChecker{ 349 Try: parallel.NewTry(0, nil), 350 client: client, 351 stderr: ctx.GetStderr(), 352 active: make(map[network.Address]chan struct{}), 353 checkDelay: timeout.RetryDelay, 354 checkHostScript: checkHostScript, 355 } 356 defer checker.wg.Wait() 357 defer checker.Kill() 358 359 fmt.Fprintln(ctx.GetStderr(), "Waiting for address") 360 for { 361 select { 362 case <-pollAddresses.C: 363 pollAddresses.Reset(timeout.AddressesDelay) 364 if err := inst.Refresh(); err != nil { 365 return "", fmt.Errorf("refreshing addresses: %v", err) 366 } 367 addresses, err := inst.Addresses() 368 if err != nil { 369 return "", fmt.Errorf("getting addresses: %v", err) 370 } 371 checker.UpdateAddresses(addresses) 372 case <-globalTimeout: 373 checker.Close() 374 lastErr := checker.Wait() 375 format := "waited for %v " 376 args := []interface{}{timeout.Timeout} 377 if len(checker.active) == 0 { 378 format += "without getting any addresses" 379 } else { 380 format += "without being able to connect" 381 } 382 if lastErr != nil && lastErr != parallel.ErrStopped { 383 format += ": %v" 384 args = append(args, lastErr) 385 } 386 return "", fmt.Errorf(format, args...) 387 case <-interrupted: 388 return "", fmt.Errorf("interrupted") 389 case <-checker.Dead(): 390 result, err := checker.Result() 391 if err != nil { 392 return "", err 393 } 394 return result.(*hostChecker).addr.Value, nil 395 } 396 } 397 }