github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/provider/common/bootstrap.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package common 5 6 import ( 7 "fmt" 8 "io" 9 "os" 10 "path" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/juju/errors" 16 "github.com/juju/loggo" 17 "github.com/juju/utils" 18 "github.com/juju/utils/parallel" 19 "github.com/juju/utils/shell" 20 21 "github.com/juju/juju/agent" 22 "github.com/juju/juju/cloudconfig" 23 "github.com/juju/juju/cloudconfig/cloudinit" 24 "github.com/juju/juju/cloudconfig/instancecfg" 25 "github.com/juju/juju/cloudconfig/sshinit" 26 "github.com/juju/juju/environs" 27 "github.com/juju/juju/environs/config" 28 "github.com/juju/juju/instance" 29 "github.com/juju/juju/network" 30 coretools "github.com/juju/juju/tools" 31 "github.com/juju/juju/utils/ssh" 32 ) 33 34 var logger = loggo.GetLogger("juju.provider.common") 35 36 // Bootstrap is a common implementation of the Bootstrap method defined on 37 // environs.Environ; we strongly recommend that this implementation be used 38 // when writing a new provider. 39 func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 40 ) (arch, series string, _ environs.BootstrapFinalizer, err error) { 41 if result, series, finalizer, err := BootstrapInstance(ctx, env, args); err == nil { 42 return *result.Hardware.Arch, series, finalizer, nil 43 } else { 44 return "", "", nil, err 45 } 46 } 47 48 // BootstrapInstance creates a new instance with the series and architecture 49 // of its choice, constrained to those of the available tools, and 50 // returns the instance result, series, and a function that 51 // must be called to finalize the bootstrap process by transferring 52 // the tools and installing the initial Juju state server. 53 // This method is called by Bootstrap above, which implements environs.Bootstrap, but 54 // is also exported so that providers can manipulate the started instance. 55 func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams, 56 ) (_ *environs.StartInstanceResult, series string, _ environs.BootstrapFinalizer, err error) { 57 // TODO make safe in the case of racing Bootstraps 58 // If two Bootstraps are called concurrently, there's 59 // no way to make sure that only one succeeds. 60 61 // First thing, ensure we have tools otherwise there's no point. 62 series = config.PreferredSeries(env.Config()) 63 availableTools, err := args.AvailableTools.Match(coretools.Filter{Series: series}) 64 if err != nil { 65 return nil, "", nil, err 66 } 67 68 // Get the bootstrap SSH client. Do this early, so we know 69 // not to bother with any of the below if we can't finish the job. 70 client := ssh.DefaultClient 71 if client == nil { 72 // This should never happen: if we don't have OpenSSH, then 73 // go.crypto/ssh should be used with an auto-generated key. 74 return nil, "", nil, fmt.Errorf("no SSH client available") 75 } 76 77 instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(args.Constraints, series) 78 if err != nil { 79 return nil, "", nil, err 80 } 81 instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate() 82 instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade() 83 instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs) 84 maybeSetBridge := func(icfg *instancecfg.InstanceConfig) { 85 // If we need to override the default bridge name, do it now. When 86 // args.ContainerBridgeName is empty, the default names for LXC 87 // (lxcbr0) and KVM (virbr0) will be used. 88 if args.ContainerBridgeName != "" { 89 logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName) 90 if icfg.AgentEnvironment == nil { 91 icfg.AgentEnvironment = make(map[string]string) 92 } 93 icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName 94 } 95 } 96 maybeSetBridge(instanceConfig) 97 98 fmt.Fprintln(ctx.GetStderr(), "Launching instance") 99 result, err := env.StartInstance(environs.StartInstanceParams{ 100 Constraints: args.Constraints, 101 Tools: availableTools, 102 InstanceConfig: instanceConfig, 103 Placement: args.Placement, 104 }) 105 if err != nil { 106 return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance") 107 } 108 fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id()) 109 110 finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error { 111 icfg.InstanceId = result.Instance.Id() 112 icfg.HardwareCharacteristics = result.Hardware 113 if err := instancecfg.FinishInstanceConfig(icfg, env.Config()); err != nil { 114 return err 115 } 116 maybeSetBridge(icfg) 117 return FinishBootstrap(ctx, client, result.Instance, icfg) 118 } 119 return result, series, finalize, nil 120 } 121 122 // FinishBootstrap completes the bootstrap process by connecting 123 // to the instance via SSH and carrying out the cloud-config. 124 // 125 // Note: FinishBootstrap is exposed so it can be replaced for testing. 126 var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, instanceConfig *instancecfg.InstanceConfig) error { 127 interrupted := make(chan os.Signal, 1) 128 ctx.InterruptNotify(interrupted) 129 defer ctx.StopInterruptNotify(interrupted) 130 // Each attempt to connect to an address must verify the machine is the 131 // bootstrap machine by checking its nonce file exists and contains the 132 // nonce in the InstanceConfig. This also blocks sshinit from proceeding 133 // until cloud-init has completed, which is necessary to ensure apt 134 // invocations don't trample each other. 135 nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile)) 136 checkNonceCommand := fmt.Sprintf(` 137 noncefile=%s 138 if [ ! -e "$noncefile" ]; then 139 echo "$noncefile does not exist" >&2 140 exit 1 141 fi 142 content=$(cat $noncefile) 143 if [ "$content" != %s ]; then 144 echo "$noncefile contents do not match machine nonce" >&2 145 exit 1 146 fi 147 `, nonceFile, utils.ShQuote(instanceConfig.MachineNonce)) 148 addr, err := waitSSH( 149 ctx, 150 interrupted, 151 client, 152 checkNonceCommand, 153 inst, 154 instanceConfig.Config.BootstrapSSHOpts(), 155 ) 156 if err != nil { 157 return err 158 } 159 return ConfigureMachine(ctx, client, addr, instanceConfig) 160 } 161 162 func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error { 163 // Bootstrap is synchronous, and will spawn a subprocess 164 // to complete the procedure. If the user hits Ctrl-C, 165 // SIGINT is sent to the foreground process attached to 166 // the terminal, which will be the ssh subprocess at this 167 // point. For that reason, we do not call StopInterruptNotify 168 // until this function completes. 169 cloudcfg, err := cloudinit.New(instanceConfig.Series) 170 if err != nil { 171 return errors.Trace(err) 172 } 173 174 // Set packaging update here 175 cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate) 176 cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade) 177 178 udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg) 179 if err != nil { 180 return err 181 } 182 if err := udata.ConfigureJuju(); err != nil { 183 return err 184 } 185 configScript, err := cloudcfg.RenderScript() 186 if err != nil { 187 return err 188 } 189 script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript 190 return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{ 191 Host: "ubuntu@" + host, 192 Client: client, 193 Config: cloudcfg, 194 ProgressWriter: ctx.GetStderr(), 195 Series: instanceConfig.Series, 196 }) 197 } 198 199 type addresser interface { 200 // Refresh refreshes the addresses for the instance. 201 Refresh() error 202 203 // Addresses returns the addresses for the instance. 204 // To ensure that the results are up to date, call 205 // Refresh first. 206 Addresses() ([]network.Address, error) 207 } 208 209 type hostChecker struct { 210 addr network.Address 211 client ssh.Client 212 wg *sync.WaitGroup 213 214 // checkDelay is the amount of time to wait between retries. 215 checkDelay time.Duration 216 217 // checkHostScript is executed on the host via SSH. 218 // hostChecker.loop will return once the script 219 // runs without error. 220 checkHostScript string 221 222 // closed is closed to indicate that the host checker should 223 // return, without waiting for the result of any ongoing 224 // attempts. 225 closed <-chan struct{} 226 } 227 228 // Close implements io.Closer, as required by parallel.Try. 229 func (*hostChecker) Close() error { 230 return nil 231 } 232 233 func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) { 234 defer hc.wg.Done() 235 // The value of connectSSH is taken outside the goroutine that may outlive 236 // hostChecker.loop, or we evoke the wrath of the race detector. 237 connectSSH := connectSSH 238 done := make(chan error, 1) 239 var lastErr error 240 for { 241 go func() { 242 done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript) 243 }() 244 select { 245 case <-hc.closed: 246 return hc, lastErr 247 case <-dying: 248 return hc, lastErr 249 case lastErr = <-done: 250 if lastErr == nil { 251 return hc, nil 252 } 253 } 254 select { 255 case <-hc.closed: 256 case <-dying: 257 case <-time.After(hc.checkDelay): 258 } 259 } 260 } 261 262 type parallelHostChecker struct { 263 *parallel.Try 264 client ssh.Client 265 stderr io.Writer 266 wg sync.WaitGroup 267 268 // active is a map of adresses to channels for addresses actively 269 // being tested. The goroutine testing the address will continue 270 // to attempt connecting to the address until it succeeds, the Try 271 // is killed, or the corresponding channel in this map is closed. 272 active map[network.Address]chan struct{} 273 274 // checkDelay is how long each hostChecker waits between attempts. 275 checkDelay time.Duration 276 277 // checkHostScript is the script to run on each host to check that 278 // it is the host we expect. 279 checkHostScript string 280 } 281 282 func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) { 283 for _, addr := range addrs { 284 if _, ok := p.active[addr]; ok { 285 continue 286 } 287 fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value) 288 closed := make(chan struct{}) 289 hc := &hostChecker{ 290 addr: addr, 291 client: p.client, 292 checkDelay: p.checkDelay, 293 checkHostScript: p.checkHostScript, 294 closed: closed, 295 wg: &p.wg, 296 } 297 p.wg.Add(1) 298 p.active[addr] = closed 299 p.Start(hc.loop) 300 } 301 } 302 303 // Close prevents additional functions from being added to 304 // the Try, and tells each active hostChecker to exit. 305 func (p *parallelHostChecker) Close() error { 306 // We signal each checker to stop and wait for them 307 // each to complete; this allows us to get the error, 308 // as opposed to when using try.Kill which does not 309 // wait for the functions to complete. 310 p.Try.Close() 311 for _, ch := range p.active { 312 close(ch) 313 } 314 return nil 315 } 316 317 // connectSSH is called to connect to the specified host and 318 // execute the "checkHostScript" bash script on it. 319 var connectSSH = func(client ssh.Client, host, checkHostScript string) error { 320 cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil) 321 cmd.Stdin = strings.NewReader(checkHostScript) 322 output, err := cmd.CombinedOutput() 323 if err != nil && len(output) > 0 { 324 err = fmt.Errorf("%s", strings.TrimSpace(string(output))) 325 } 326 return err 327 } 328 329 // waitSSH waits for the instance to be assigned a routable 330 // address, then waits until we can connect to it via SSH. 331 // 332 // waitSSH attempts on all addresses returned by the instance 333 // in parallel; the first succeeding one wins. We ensure that 334 // private addresses are for the correct machine by checking 335 // the presence of a file on the machine that contains the 336 // machine's nonce. The "checkHostScript" is a bash script 337 // that performs this file check. 338 func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) { 339 globalTimeout := time.After(timeout.Timeout) 340 pollAddresses := time.NewTimer(0) 341 342 // checker checks each address in a loop, in parallel, 343 // until one succeeds, the global timeout is reached, 344 // or the tomb is killed. 345 checker := parallelHostChecker{ 346 Try: parallel.NewTry(0, nil), 347 client: client, 348 stderr: ctx.GetStderr(), 349 active: make(map[network.Address]chan struct{}), 350 checkDelay: timeout.RetryDelay, 351 checkHostScript: checkHostScript, 352 } 353 defer checker.wg.Wait() 354 defer checker.Kill() 355 356 fmt.Fprintln(ctx.GetStderr(), "Waiting for address") 357 for { 358 select { 359 case <-pollAddresses.C: 360 pollAddresses.Reset(timeout.AddressesDelay) 361 if err := inst.Refresh(); err != nil { 362 return "", fmt.Errorf("refreshing addresses: %v", err) 363 } 364 addresses, err := inst.Addresses() 365 if err != nil { 366 return "", fmt.Errorf("getting addresses: %v", err) 367 } 368 checker.UpdateAddresses(addresses) 369 case <-globalTimeout: 370 checker.Close() 371 lastErr := checker.Wait() 372 format := "waited for %v " 373 args := []interface{}{timeout.Timeout} 374 if len(checker.active) == 0 { 375 format += "without getting any addresses" 376 } else { 377 format += "without being able to connect" 378 } 379 if lastErr != nil && lastErr != parallel.ErrStopped { 380 format += ": %v" 381 args = append(args, lastErr) 382 } 383 return "", fmt.Errorf(format, args...) 384 case <-interrupted: 385 return "", fmt.Errorf("interrupted") 386 case <-checker.Dead(): 387 result, err := checker.Result() 388 if err != nil { 389 return "", err 390 } 391 return result.(*hostChecker).addr.Value, nil 392 } 393 } 394 }