github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/juju/loggo"
    15  	"github.com/juju/utils"
    16  	"github.com/juju/utils/parallel"
    17  	"github.com/juju/utils/shell"
    18  
    19  	coreCloudinit "github.com/juju/juju/cloudinit"
    20  	"github.com/juju/juju/cloudinit/sshinit"
    21  	"github.com/juju/juju/environs"
    22  	"github.com/juju/juju/environs/bootstrap"
    23  	"github.com/juju/juju/environs/cloudinit"
    24  	"github.com/juju/juju/environs/config"
    25  	"github.com/juju/juju/instance"
    26  	"github.com/juju/juju/network"
    27  	coretools "github.com/juju/juju/tools"
    28  	"github.com/juju/juju/utils/ssh"
    29  )
    30  
    31  var logger = loggo.GetLogger("juju.provider.common")
    32  
    33  // Bootstrap is a common implementation of the Bootstrap method defined on
    34  // environs.Environ; we strongly recommend that this implementation be used
    35  // when writing a new provider.
    36  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams) (err error) {
    37  	// TODO make safe in the case of racing Bootstraps
    38  	// If two Bootstraps are called concurrently, there's
    39  	// no way to make sure that only one succeeds.
    40  
    41  	var inst instance.Instance
    42  	defer func() { handleBootstrapError(err, ctx, inst, env) }()
    43  
    44  	// First thing, ensure we have tools otherwise there's no point.
    45  	selectedTools, err := EnsureBootstrapTools(ctx, env, config.PreferredSeries(env.Config()), args.Constraints.Arch)
    46  	if err != nil {
    47  		return err
    48  	}
    49  
    50  	// Get the bootstrap SSH client. Do this early, so we know
    51  	// not to bother with any of the below if we can't finish the job.
    52  	client := ssh.DefaultClient
    53  	if client == nil {
    54  		// This should never happen: if we don't have OpenSSH, then
    55  		// go.crypto/ssh should be used with an auto-generated key.
    56  		return fmt.Errorf("no SSH client available")
    57  	}
    58  
    59  	privateKey, err := GenerateSystemSSHKey(env)
    60  	if err != nil {
    61  		return err
    62  	}
    63  	machineConfig := environs.NewBootstrapMachineConfig(privateKey)
    64  
    65  	fmt.Fprintln(ctx.GetStderr(), "Launching instance")
    66  	inst, hw, _, err := env.StartInstance(environs.StartInstanceParams{
    67  		Constraints:   args.Constraints,
    68  		Tools:         selectedTools,
    69  		MachineConfig: machineConfig,
    70  		Placement:     args.Placement,
    71  	})
    72  	if err != nil {
    73  		return fmt.Errorf("cannot start bootstrap instance: %v", err)
    74  	}
    75  	fmt.Fprintf(ctx.GetStderr(), " - %s\n", inst.Id())
    76  	machineConfig.InstanceId = inst.Id()
    77  	machineConfig.HardwareCharacteristics = hw
    78  
    79  	err = bootstrap.SaveState(
    80  		env.Storage(),
    81  		&bootstrap.BootstrapState{
    82  			StateInstances: []instance.Id{inst.Id()},
    83  		})
    84  	if err != nil {
    85  		return fmt.Errorf("cannot save state: %v", err)
    86  	}
    87  	return FinishBootstrap(ctx, client, inst, machineConfig)
    88  }
    89  
    90  // GenerateSystemSSHKey creates a new key for the system identity. The
    91  // authorized_keys in the environment config is updated to include the public
    92  // key for the generated key.
    93  func GenerateSystemSSHKey(env environs.Environ) (privateKey string, err error) {
    94  	logger.Debugf("generate a system ssh key")
    95  	// Create a new system ssh key and add that to the authorized keys.
    96  	privateKey, publicKey, err := ssh.GenerateKey(config.JujuSystemKey)
    97  	if err != nil {
    98  		return "", fmt.Errorf("failed to create system key: %v", err)
    99  	}
   100  	authorized_keys := config.ConcatAuthKeys(env.Config().AuthorizedKeys(), publicKey)
   101  	newConfig, err := env.Config().Apply(map[string]interface{}{
   102  		config.AuthKeysConfig: authorized_keys,
   103  	})
   104  	if err != nil {
   105  		return "", fmt.Errorf("failed to create new config: %v", err)
   106  	}
   107  	if err = env.SetConfig(newConfig); err != nil {
   108  		return "", fmt.Errorf("failed to set new config: %v", err)
   109  	}
   110  	return privateKey, nil
   111  }
   112  
   113  // handleBootstrapError cleans up after a failed bootstrap.
   114  func handleBootstrapError(err error, ctx environs.BootstrapContext, inst instance.Instance, env environs.Environ) {
   115  	if err == nil {
   116  		return
   117  	}
   118  
   119  	logger.Errorf("bootstrap failed: %v", err)
   120  	ch := make(chan os.Signal, 1)
   121  	ctx.InterruptNotify(ch)
   122  	defer ctx.StopInterruptNotify(ch)
   123  	defer close(ch)
   124  	go func() {
   125  		for _ = range ch {
   126  			fmt.Fprintln(ctx.GetStderr(), "Cleaning up failed bootstrap")
   127  		}
   128  	}()
   129  
   130  	if inst != nil {
   131  		fmt.Fprintln(ctx.GetStderr(), "Stopping instance...")
   132  		if stoperr := env.StopInstances(inst.Id()); stoperr != nil {
   133  			logger.Errorf("cannot stop failed bootstrap instance %q: %v", inst.Id(), stoperr)
   134  		} else {
   135  			// set to nil so we know we can safely delete the state file
   136  			inst = nil
   137  		}
   138  	}
   139  	// We only delete the bootstrap state file if either we didn't
   140  	// start an instance, or we managed to cleanly stop it.
   141  	if inst == nil {
   142  		if rmerr := bootstrap.DeleteStateFile(env.Storage()); rmerr != nil {
   143  			logger.Errorf("cannot delete bootstrap state file: %v", rmerr)
   144  		}
   145  	}
   146  }
   147  
   148  // FinishBootstrap completes the bootstrap process by connecting
   149  // to the instance via SSH and carrying out the cloud-config.
   150  //
   151  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   152  var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, machineConfig *cloudinit.MachineConfig) error {
   153  	interrupted := make(chan os.Signal, 1)
   154  	ctx.InterruptNotify(interrupted)
   155  	defer ctx.StopInterruptNotify(interrupted)
   156  	// Each attempt to connect to an address must verify the machine is the
   157  	// bootstrap machine by checking its nonce file exists and contains the
   158  	// nonce in the MachineConfig. This also blocks sshinit from proceeding
   159  	// until cloud-init has completed, which is necessary to ensure apt
   160  	// invocations don't trample each other.
   161  	nonceFile := utils.ShQuote(path.Join(machineConfig.DataDir, cloudinit.NonceFile))
   162  	checkNonceCommand := fmt.Sprintf(`
   163  	noncefile=%s
   164  	if [ ! -e "$noncefile" ]; then
   165  		echo "$noncefile does not exist" >&2
   166  		exit 1
   167  	fi
   168  	content=$(cat $noncefile)
   169  	if [ "$content" != %s ]; then
   170  		echo "$noncefile contents do not match machine nonce" >&2
   171  		exit 1
   172  	fi
   173  	`, nonceFile, utils.ShQuote(machineConfig.MachineNonce))
   174  	addr, err := waitSSH(
   175  		ctx,
   176  		interrupted,
   177  		client,
   178  		checkNonceCommand,
   179  		inst,
   180  		machineConfig.Config.BootstrapSSHOpts(),
   181  	)
   182  	if err != nil {
   183  		return err
   184  	}
   185  	// Bootstrap is synchronous, and will spawn a subprocess
   186  	// to complete the procedure. If the user hits Ctrl-C,
   187  	// SIGINT is sent to the foreground process attached to
   188  	// the terminal, which will be the ssh subprocess at this
   189  	// point. For that reason, we do not call StopInterruptNotify
   190  	// until this function completes.
   191  	cloudcfg := coreCloudinit.New()
   192  	if err := cloudinit.ConfigureJuju(machineConfig, cloudcfg); err != nil {
   193  		return err
   194  	}
   195  	configScript, err := sshinit.ConfigureScript(cloudcfg)
   196  	if err != nil {
   197  		return err
   198  	}
   199  	script := shell.DumpFileOnErrorScript(machineConfig.CloudInitOutputLog) + configScript
   200  	return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
   201  		Host:           "ubuntu@" + addr,
   202  		Client:         client,
   203  		Config:         cloudcfg,
   204  		ProgressWriter: ctx.GetStderr(),
   205  	})
   206  }
   207  
   208  type addresser interface {
   209  	// Refresh refreshes the addresses for the instance.
   210  	Refresh() error
   211  
   212  	// Addresses returns the addresses for the instance.
   213  	// To ensure that the results are up to date, call
   214  	// Refresh first.
   215  	Addresses() ([]network.Address, error)
   216  }
   217  
   218  type hostChecker struct {
   219  	addr   network.Address
   220  	client ssh.Client
   221  
   222  	// checkDelay is the amount of time to wait between retries.
   223  	checkDelay time.Duration
   224  
   225  	// checkHostScript is executed on the host via SSH.
   226  	// hostChecker.loop will return once the script
   227  	// runs without error.
   228  	checkHostScript string
   229  
   230  	// closed is closed to indicate that the host checker should
   231  	// return, without waiting for the result of any ongoing
   232  	// attempts.
   233  	closed <-chan struct{}
   234  }
   235  
   236  // Close implements io.Closer, as required by parallel.Try.
   237  func (*hostChecker) Close() error {
   238  	return nil
   239  }
   240  
   241  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   242  	// The value of connectSSH is taken outside the goroutine that may outlive
   243  	// hostChecker.loop, or we evoke the wrath of the race detector.
   244  	connectSSH := connectSSH
   245  	done := make(chan error, 1)
   246  	var lastErr error
   247  	for {
   248  		go func() {
   249  			done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript)
   250  		}()
   251  		select {
   252  		case <-hc.closed:
   253  			return hc, lastErr
   254  		case <-dying:
   255  			return hc, lastErr
   256  		case lastErr = <-done:
   257  			if lastErr == nil {
   258  				return hc, nil
   259  			}
   260  		}
   261  		select {
   262  		case <-hc.closed:
   263  		case <-dying:
   264  		case <-time.After(hc.checkDelay):
   265  		}
   266  	}
   267  }
   268  
   269  type parallelHostChecker struct {
   270  	*parallel.Try
   271  	client ssh.Client
   272  	stderr io.Writer
   273  
   274  	// active is a map of adresses to channels for addresses actively
   275  	// being tested. The goroutine testing the address will continue
   276  	// to attempt connecting to the address until it succeeds, the Try
   277  	// is killed, or the corresponding channel in this map is closed.
   278  	active map[network.Address]chan struct{}
   279  
   280  	// checkDelay is how long each hostChecker waits between attempts.
   281  	checkDelay time.Duration
   282  
   283  	// checkHostScript is the script to run on each host to check that
   284  	// it is the host we expect.
   285  	checkHostScript string
   286  }
   287  
   288  func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
   289  	for _, addr := range addrs {
   290  		if _, ok := p.active[addr]; ok {
   291  			continue
   292  		}
   293  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   294  		closed := make(chan struct{})
   295  		hc := &hostChecker{
   296  			addr:            addr,
   297  			client:          p.client,
   298  			checkDelay:      p.checkDelay,
   299  			checkHostScript: p.checkHostScript,
   300  			closed:          closed,
   301  		}
   302  		p.active[addr] = closed
   303  		p.Start(hc.loop)
   304  	}
   305  }
   306  
   307  // Close prevents additional functions from being added to
   308  // the Try, and tells each active hostChecker to exit.
   309  func (p *parallelHostChecker) Close() error {
   310  	// We signal each checker to stop and wait for them
   311  	// each to complete; this allows us to get the error,
   312  	// as opposed to when using try.Kill which does not
   313  	// wait for the functions to complete.
   314  	p.Try.Close()
   315  	for _, ch := range p.active {
   316  		close(ch)
   317  	}
   318  	return nil
   319  }
   320  
   321  // connectSSH is called to connect to the specified host and
   322  // execute the "checkHostScript" bash script on it.
   323  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   324  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   325  	cmd.Stdin = strings.NewReader(checkHostScript)
   326  	output, err := cmd.CombinedOutput()
   327  	if err != nil && len(output) > 0 {
   328  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   329  	}
   330  	return err
   331  }
   332  
   333  // waitSSH waits for the instance to be assigned a routable
   334  // address, then waits until we can connect to it via SSH.
   335  //
   336  // waitSSH attempts on all addresses returned by the instance
   337  // in parallel; the first succeeding one wins. We ensure that
   338  // private addresses are for the correct machine by checking
   339  // the presence of a file on the machine that contains the
   340  // machine's nonce. The "checkHostScript" is a bash script
   341  // that performs this file check.
   342  func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
   343  	globalTimeout := time.After(timeout.Timeout)
   344  	pollAddresses := time.NewTimer(0)
   345  
   346  	// checker checks each address in a loop, in parallel,
   347  	// until one succeeds, the global timeout is reached,
   348  	// or the tomb is killed.
   349  	checker := parallelHostChecker{
   350  		Try:             parallel.NewTry(0, nil),
   351  		client:          client,
   352  		stderr:          ctx.GetStderr(),
   353  		active:          make(map[network.Address]chan struct{}),
   354  		checkDelay:      timeout.RetryDelay,
   355  		checkHostScript: checkHostScript,
   356  	}
   357  	defer checker.Kill()
   358  
   359  	fmt.Fprintln(ctx.GetStderr(), "Waiting for address")
   360  	for {
   361  		select {
   362  		case <-pollAddresses.C:
   363  			pollAddresses.Reset(timeout.AddressesDelay)
   364  			if err := inst.Refresh(); err != nil {
   365  				return "", fmt.Errorf("refreshing addresses: %v", err)
   366  			}
   367  			addresses, err := inst.Addresses()
   368  			if err != nil {
   369  				return "", fmt.Errorf("getting addresses: %v", err)
   370  			}
   371  			checker.UpdateAddresses(addresses)
   372  		case <-globalTimeout:
   373  			checker.Close()
   374  			lastErr := checker.Wait()
   375  			format := "waited for %v "
   376  			args := []interface{}{timeout.Timeout}
   377  			if len(checker.active) == 0 {
   378  				format += "without getting any addresses"
   379  			} else {
   380  				format += "without being able to connect"
   381  			}
   382  			if lastErr != nil && lastErr != parallel.ErrStopped {
   383  				format += ": %v"
   384  				args = append(args, lastErr)
   385  			}
   386  			return "", fmt.Errorf(format, args...)
   387  		case <-interrupted:
   388  			return "", fmt.Errorf("interrupted")
   389  		case <-checker.Dead():
   390  			result, err := checker.Result()
   391  			if err != nil {
   392  				return "", err
   393  			}
   394  			return result.(*hostChecker).addr.Value, nil
   395  		}
   396  	}
   397  }
   398  
   399  // EnsureBootstrapTools finds tools, syncing with an external tools source as
   400  // necessary; it then selects the newest tools to bootstrap with, and sets
   401  // agent-version.
   402  func EnsureBootstrapTools(ctx environs.BootstrapContext, env environs.Environ, series string, arch *string) (coretools.List, error) {
   403  	possibleTools, err := bootstrap.EnsureToolsAvailability(ctx, env, series, arch)
   404  	if err != nil {
   405  		return nil, err
   406  	}
   407  	return bootstrap.SetBootstrapTools(env, possibleTools)
   408  }