github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/juju/loggo"
    15  
    16  	coreCloudinit "launchpad.net/juju-core/cloudinit"
    17  	"launchpad.net/juju-core/cloudinit/sshinit"
    18  	"launchpad.net/juju-core/constraints"
    19  	"launchpad.net/juju-core/environs"
    20  	"launchpad.net/juju-core/environs/bootstrap"
    21  	"launchpad.net/juju-core/environs/cloudinit"
    22  	"launchpad.net/juju-core/environs/config"
    23  	"launchpad.net/juju-core/instance"
    24  	coretools "launchpad.net/juju-core/tools"
    25  	"launchpad.net/juju-core/utils"
    26  	"launchpad.net/juju-core/utils/parallel"
    27  	"launchpad.net/juju-core/utils/ssh"
    28  )
    29  
    30  var logger = loggo.GetLogger("juju.provider.common")
    31  
    32  // Bootstrap is a common implementation of the Bootstrap method defined on
    33  // environs.Environ; we strongly recommend that this implementation be used
    34  // when writing a new provider.
    35  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, cons constraints.Value) (err error) {
    36  	// TODO make safe in the case of racing Bootstraps
    37  	// If two Bootstraps are called concurrently, there's
    38  	// no way to make sure that only one succeeds.
    39  
    40  	var inst instance.Instance
    41  	defer func() { handleBootstrapError(err, ctx, inst, env) }()
    42  
    43  	// Get the bootstrap SSH client. Do this early, so we know
    44  	// not to bother with any of the below if we can't finish the job.
    45  	client := ssh.DefaultClient
    46  	if client == nil {
    47  		// This should never happen: if we don't have OpenSSH, then
    48  		// go.crypto/ssh should be used with an auto-generated key.
    49  		return fmt.Errorf("no SSH client available")
    50  	}
    51  
    52  	// Create an empty bootstrap state file so we can get its URL.
    53  	// It will be updated with the instance id and hardware characteristics
    54  	// after the bootstrap instance is started.
    55  	stateFileURL, err := bootstrap.CreateStateFile(env.Storage())
    56  	if err != nil {
    57  		return err
    58  	}
    59  
    60  	privateKey, err := GenerateSystemSSHKey(env)
    61  	if err != nil {
    62  		return err
    63  	}
    64  	machineConfig := environs.NewBootstrapMachineConfig(stateFileURL, privateKey)
    65  
    66  	selectedTools, err := EnsureBootstrapTools(env, env.Config().DefaultSeries(), cons.Arch)
    67  	if err != nil {
    68  		return err
    69  	}
    70  
    71  	fmt.Fprintln(ctx.GetStderr(), "Launching instance")
    72  	inst, hw, err := env.StartInstance(cons, selectedTools, machineConfig)
    73  	if err != nil {
    74  		return fmt.Errorf("cannot start bootstrap instance: %v", err)
    75  	}
    76  	fmt.Fprintf(ctx.GetStderr(), " - %s\n", inst.Id())
    77  
    78  	var characteristics []instance.HardwareCharacteristics
    79  	if hw != nil {
    80  		characteristics = []instance.HardwareCharacteristics{*hw}
    81  	}
    82  	err = bootstrap.SaveState(
    83  		env.Storage(),
    84  		&bootstrap.BootstrapState{
    85  			StateInstances:  []instance.Id{inst.Id()},
    86  			Characteristics: characteristics,
    87  		})
    88  	if err != nil {
    89  		return fmt.Errorf("cannot save state: %v", err)
    90  	}
    91  	return FinishBootstrap(ctx, client, inst, machineConfig)
    92  }
    93  
    94  // GenerateSystemSSHKey creates a new key for the system identity. The
    95  // authorized_keys in the environment config is updated to include the public
    96  // key for the generated key.
    97  func GenerateSystemSSHKey(env environs.Environ) (privateKey string, err error) {
    98  	logger.Debugf("generate a system ssh key")
    99  	// Create a new system ssh key and add that to the authorized keys.
   100  	privateKey, publicKey, err := ssh.GenerateKey(config.JujuSystemKey)
   101  	if err != nil {
   102  		return "", fmt.Errorf("failed to create system key: %v", err)
   103  	}
   104  	authorized_keys := config.ConcatAuthKeys(env.Config().AuthorizedKeys(), publicKey)
   105  	newConfig, err := env.Config().Apply(map[string]interface{}{
   106  		config.AuthKeysConfig: authorized_keys,
   107  	})
   108  	if err != nil {
   109  		return "", fmt.Errorf("failed to create new config: %v", err)
   110  	}
   111  	if err = env.SetConfig(newConfig); err != nil {
   112  		return "", fmt.Errorf("failed to set new config: %v", err)
   113  	}
   114  	return privateKey, nil
   115  }
   116  
   117  // handelBootstrapError cleans up after a failed bootstrap.
   118  func handleBootstrapError(err error, ctx environs.BootstrapContext, inst instance.Instance, env environs.Environ) {
   119  	if err == nil {
   120  		return
   121  	}
   122  
   123  	logger.Errorf("bootstrap failed: %v", err)
   124  	ch := make(chan os.Signal, 1)
   125  	ctx.InterruptNotify(ch)
   126  	defer ctx.StopInterruptNotify(ch)
   127  	defer close(ch)
   128  	go func() {
   129  		for _ = range ch {
   130  			fmt.Fprintln(ctx.GetStderr(), "Cleaning up failed bootstrap")
   131  		}
   132  	}()
   133  
   134  	if inst != nil {
   135  		fmt.Fprintln(ctx.GetStderr(), "Stopping instance...")
   136  		if stoperr := env.StopInstances([]instance.Instance{inst}); stoperr != nil {
   137  			logger.Errorf("cannot stop failed bootstrap instance %q: %v", inst.Id(), stoperr)
   138  		} else {
   139  			// set to nil so we know we can safely delete the state file
   140  			inst = nil
   141  		}
   142  	}
   143  	// We only delete the bootstrap state file if either we didn't
   144  	// start an instance, or we managed to cleanly stop it.
   145  	if inst == nil {
   146  		if rmerr := bootstrap.DeleteStateFile(env.Storage()); rmerr != nil {
   147  			logger.Errorf("cannot delete bootstrap state file: %v", rmerr)
   148  		}
   149  	}
   150  }
   151  
   152  // FinishBootstrap completes the bootstrap process by connecting
   153  // to the instance via SSH and carrying out the cloud-config.
   154  //
   155  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   156  var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, machineConfig *cloudinit.MachineConfig) error {
   157  	interrupted := make(chan os.Signal, 1)
   158  	ctx.InterruptNotify(interrupted)
   159  	defer ctx.StopInterruptNotify(interrupted)
   160  	// Each attempt to connect to an address must verify the machine is the
   161  	// bootstrap machine by checking its nonce file exists and contains the
   162  	// nonce in the MachineConfig. This also blocks sshinit from proceeding
   163  	// until cloud-init has completed, which is necessary to ensure apt
   164  	// invocations don't trample each other.
   165  	nonceFile := utils.ShQuote(path.Join(machineConfig.DataDir, cloudinit.NonceFile))
   166  	checkNonceCommand := fmt.Sprintf(`
   167  	noncefile=%s
   168  	if [ ! -e "$noncefile" ]; then
   169  		echo "$noncefile does not exist" >&2
   170  		exit 1
   171  	fi
   172  	content=$(cat $noncefile)
   173  	if [ "$content" != %s ]; then
   174  		echo "$noncefile contents do not match machine nonce" >&2
   175  		exit 1
   176  	fi
   177  	`, nonceFile, utils.ShQuote(machineConfig.MachineNonce))
   178  	addr, err := waitSSH(
   179  		ctx,
   180  		interrupted,
   181  		client,
   182  		checkNonceCommand,
   183  		inst,
   184  		machineConfig.Config.BootstrapSSHOpts(),
   185  	)
   186  	if err != nil {
   187  		return err
   188  	}
   189  	// Bootstrap is synchronous, and will spawn a subprocess
   190  	// to complete the procedure. If the user hits Ctrl-C,
   191  	// SIGINT is sent to the foreground process attached to
   192  	// the terminal, which will be the ssh subprocess at this
   193  	// point. For that reason, we do not call StopInterruptNotify
   194  	// until this function completes.
   195  	cloudcfg := coreCloudinit.New()
   196  	if err := cloudinit.ConfigureJuju(machineConfig, cloudcfg); err != nil {
   197  		return err
   198  	}
   199  	return sshinit.Configure(sshinit.ConfigureParams{
   200  		Host:           "ubuntu@" + addr,
   201  		Client:         client,
   202  		Config:         cloudcfg,
   203  		ProgressWriter: ctx.GetStderr(),
   204  	})
   205  }
   206  
   207  type addresser interface {
   208  	// Refresh refreshes the addresses for the instance.
   209  	Refresh() error
   210  
   211  	// Addresses returns the addresses for the instance.
   212  	// To ensure that the results are up to date, call
   213  	// Refresh first.
   214  	Addresses() ([]instance.Address, error)
   215  }
   216  
   217  type hostChecker struct {
   218  	addr   instance.Address
   219  	client ssh.Client
   220  
   221  	// checkDelay is the amount of time to wait between retries.
   222  	checkDelay time.Duration
   223  
   224  	// checkHostScript is executed on the host via SSH.
   225  	// hostChecker.loop will return once the script
   226  	// runs without error.
   227  	checkHostScript string
   228  
   229  	// closed is closed to indicate that the host checker should
   230  	// return, without waiting for the result of any ongoing
   231  	// attempts.
   232  	closed <-chan struct{}
   233  }
   234  
   235  // Close implements io.Closer, as required by parallel.Try.
   236  func (*hostChecker) Close() error {
   237  	return nil
   238  }
   239  
   240  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   241  	// The value of connectSSH is taken outside the goroutine that may outlive
   242  	// hostChecker.loop, or we evoke the wrath of the race detector.
   243  	connectSSH := connectSSH
   244  	done := make(chan error, 1)
   245  	var lastErr error
   246  	for {
   247  		go func() {
   248  			done <- connectSSH(hc.client, hc.addr.Value, hc.checkHostScript)
   249  		}()
   250  		select {
   251  		case <-hc.closed:
   252  			return hc, lastErr
   253  		case <-dying:
   254  			return hc, lastErr
   255  		case lastErr = <-done:
   256  			if lastErr == nil {
   257  				return hc, nil
   258  			}
   259  		}
   260  		select {
   261  		case <-hc.closed:
   262  		case <-dying:
   263  		case <-time.After(hc.checkDelay):
   264  		}
   265  	}
   266  }
   267  
   268  type parallelHostChecker struct {
   269  	*parallel.Try
   270  	client ssh.Client
   271  	stderr io.Writer
   272  
   273  	// active is a map of adresses to channels for addresses actively
   274  	// being tested. The goroutine testing the address will continue
   275  	// to attempt connecting to the address until it succeeds, the Try
   276  	// is killed, or the corresponding channel in this map is closed.
   277  	active map[instance.Address]chan struct{}
   278  
   279  	// checkDelay is how long each hostChecker waits between attempts.
   280  	checkDelay time.Duration
   281  
   282  	// checkHostScript is the script to run on each host to check that
   283  	// it is the host we expect.
   284  	checkHostScript string
   285  }
   286  
   287  func (p *parallelHostChecker) UpdateAddresses(addrs []instance.Address) {
   288  	for _, addr := range addrs {
   289  		if _, ok := p.active[addr]; ok {
   290  			continue
   291  		}
   292  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   293  		closed := make(chan struct{})
   294  		hc := &hostChecker{
   295  			addr:            addr,
   296  			client:          p.client,
   297  			checkDelay:      p.checkDelay,
   298  			checkHostScript: p.checkHostScript,
   299  			closed:          closed,
   300  		}
   301  		p.active[addr] = closed
   302  		p.Start(hc.loop)
   303  	}
   304  }
   305  
   306  // Close prevents additional functions from being added to
   307  // the Try, and tells each active hostChecker to exit.
   308  func (p *parallelHostChecker) Close() error {
   309  	// We signal each checker to stop and wait for them
   310  	// each to complete; this allows us to get the error,
   311  	// as opposed to when using try.Kill which does not
   312  	// wait for the functions to complete.
   313  	p.Try.Close()
   314  	for _, ch := range p.active {
   315  		close(ch)
   316  	}
   317  	return nil
   318  }
   319  
   320  // connectSSH is called to connect to the specified host and
   321  // execute the "checkHostScript" bash script on it.
   322  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   323  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   324  	cmd.Stdin = strings.NewReader(checkHostScript)
   325  	output, err := cmd.CombinedOutput()
   326  	if err != nil && len(output) > 0 {
   327  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   328  	}
   329  	return err
   330  }
   331  
   332  // waitSSH waits for the instance to be assigned a routable
   333  // address, then waits until we can connect to it via SSH.
   334  //
   335  // waitSSH attempts on all addresses returned by the instance
   336  // in parallel; the first succeeding one wins. We ensure that
   337  // private addresses are for the correct machine by checking
   338  // the presence of a file on the machine that contains the
   339  // machine's nonce. The "checkHostScript" is a bash script
   340  // that performs this file check.
   341  func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
   342  	globalTimeout := time.After(timeout.Timeout)
   343  	pollAddresses := time.NewTimer(0)
   344  
   345  	// checker checks each address in a loop, in parallel,
   346  	// until one succeeds, the global timeout is reached,
   347  	// or the tomb is killed.
   348  	checker := parallelHostChecker{
   349  		Try:             parallel.NewTry(0, nil),
   350  		client:          client,
   351  		stderr:          ctx.GetStderr(),
   352  		active:          make(map[instance.Address]chan struct{}),
   353  		checkDelay:      timeout.RetryDelay,
   354  		checkHostScript: checkHostScript,
   355  	}
   356  	defer checker.Kill()
   357  
   358  	fmt.Fprintln(ctx.GetStderr(), "Waiting for address")
   359  	for {
   360  		select {
   361  		case <-pollAddresses.C:
   362  			pollAddresses.Reset(timeout.AddressesDelay)
   363  			if err := inst.Refresh(); err != nil {
   364  				return "", fmt.Errorf("refreshing addresses: %v", err)
   365  			}
   366  			addresses, err := inst.Addresses()
   367  			if err != nil {
   368  				return "", fmt.Errorf("getting addresses: %v", err)
   369  			}
   370  			checker.UpdateAddresses(addresses)
   371  		case <-globalTimeout:
   372  			checker.Close()
   373  			lastErr := checker.Wait()
   374  			format := "waited for %v "
   375  			args := []interface{}{timeout.Timeout}
   376  			if len(checker.active) == 0 {
   377  				format += "without getting any addresses"
   378  			} else {
   379  				format += "without being able to connect"
   380  			}
   381  			if lastErr != nil && lastErr != parallel.ErrStopped {
   382  				format += ": %v"
   383  				args = append(args, lastErr)
   384  			}
   385  			return "", fmt.Errorf(format, args...)
   386  		case <-interrupted:
   387  			return "", fmt.Errorf("interrupted")
   388  		case <-checker.Dead():
   389  			result, err := checker.Result()
   390  			if err != nil {
   391  				return "", err
   392  			}
   393  			return result.(*hostChecker).addr.Value, nil
   394  		}
   395  	}
   396  }
   397  
   398  // EnsureBootstrapTools finds tools, syncing with an external tools source as
   399  // necessary; it then selects the newest tools to bootstrap with, and sets
   400  // agent-version.
   401  func EnsureBootstrapTools(env environs.Environ, series string, arch *string) (coretools.List, error) {
   402  	possibleTools, err := bootstrap.EnsureToolsAvailability(env, series, arch)
   403  	if err != nil {
   404  		return nil, err
   405  	}
   406  	return bootstrap.SetBootstrapTools(env, possibleTools)
   407  }
   408  
   409  // EnsureNotBootstrapped returns null if the environment is not bootstrapped,
   410  // and an error if it is or if the function was not able to tell.
   411  func EnsureNotBootstrapped(env environs.Environ) error {
   412  	_, err := bootstrap.LoadState(env.Storage())
   413  	// If there is no error loading the bootstrap state, then we are
   414  	// bootstrapped.
   415  	if err == nil {
   416  		return fmt.Errorf("environment is already bootstrapped")
   417  	}
   418  	if err == environs.ErrNotBootstrapped {
   419  		return nil
   420  	}
   421  	return err
   422  }