github.com/mwhudson/juju@v0.0.0-20160512215208-90ff01f3497f/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/loggo"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/parallel"
    19  	"github.com/juju/utils/series"
    20  	"github.com/juju/utils/shell"
    21  	"github.com/juju/utils/ssh"
    22  
    23  	"github.com/juju/juju/agent"
    24  	"github.com/juju/juju/cloudconfig"
    25  	"github.com/juju/juju/cloudconfig/cloudinit"
    26  	"github.com/juju/juju/cloudconfig/instancecfg"
    27  	"github.com/juju/juju/cloudconfig/sshinit"
    28  	"github.com/juju/juju/environs"
    29  	"github.com/juju/juju/environs/config"
    30  	"github.com/juju/juju/environs/imagemetadata"
    31  	"github.com/juju/juju/environs/simplestreams"
    32  	"github.com/juju/juju/instance"
    33  	"github.com/juju/juju/network"
    34  	"github.com/juju/juju/status"
    35  	coretools "github.com/juju/juju/tools"
    36  )
    37  
    38  var logger = loggo.GetLogger("juju.provider.common")
    39  
    40  // Bootstrap is a common implementation of the Bootstrap method defined on
    41  // environs.Environ; we strongly recommend that this implementation be used
    42  // when writing a new provider.
    43  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    44  ) (*environs.BootstrapResult, error) {
    45  	result, series, finalizer, err := BootstrapInstance(ctx, env, args)
    46  	if err != nil {
    47  		return nil, errors.Trace(err)
    48  	}
    49  
    50  	bsResult := &environs.BootstrapResult{
    51  		Arch:     *result.Hardware.Arch,
    52  		Series:   series,
    53  		Finalize: finalizer,
    54  	}
    55  	return bsResult, nil
    56  }
    57  
    58  // BootstrapInstance creates a new instance with the series and architecture
    59  // of its choice, constrained to those of the available tools, and
    60  // returns the instance result, series, and a function that
    61  // must be called to finalize the bootstrap process by transferring
    62  // the tools and installing the initial Juju controller.
    63  // This method is called by Bootstrap above, which implements environs.Bootstrap, but
    64  // is also exported so that providers can manipulate the started instance.
    65  func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    66  ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) {
    67  	// TODO make safe in the case of racing Bootstraps
    68  	// If two Bootstraps are called concurrently, there's
    69  	// no way to make sure that only one succeeds.
    70  
    71  	// First thing, ensure we have tools otherwise there's no point.
    72  	if args.BootstrapSeries != "" {
    73  		selectedSeries = args.BootstrapSeries
    74  	} else {
    75  		selectedSeries = config.PreferredSeries(env.Config())
    76  	}
    77  	availableTools, err := args.AvailableTools.Match(coretools.Filter{
    78  		Series: selectedSeries,
    79  	})
    80  	if err != nil {
    81  		return nil, "", nil, err
    82  	}
    83  
    84  	// Filter image metadata to the selected series.
    85  	var imageMetadata []*imagemetadata.ImageMetadata
    86  	seriesVersion, err := series.SeriesVersion(selectedSeries)
    87  	if err != nil {
    88  		return nil, "", nil, errors.Trace(err)
    89  	}
    90  	for _, m := range args.ImageMetadata {
    91  		if m.Version != seriesVersion {
    92  			continue
    93  		}
    94  		imageMetadata = append(imageMetadata, m)
    95  	}
    96  
    97  	// Get the bootstrap SSH client. Do this early, so we know
    98  	// not to bother with any of the below if we can't finish the job.
    99  	client := ssh.DefaultClient
   100  	if client == nil {
   101  		// This should never happen: if we don't have OpenSSH, then
   102  		// go.crypto/ssh should be used with an auto-generated key.
   103  		return nil, "", nil, fmt.Errorf("no SSH client available")
   104  	}
   105  
   106  	publicKey, err := simplestreams.UserPublicSigningKey()
   107  	if err != nil {
   108  		return nil, "", nil, err
   109  	}
   110  	instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(
   111  		args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey,
   112  	)
   113  	if err != nil {
   114  		return nil, "", nil, err
   115  	}
   116  	instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate()
   117  	instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade()
   118  	instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs)
   119  	maybeSetBridge := func(icfg *instancecfg.InstanceConfig) {
   120  		// If we need to override the default bridge name, do it now. When
   121  		// args.ContainerBridgeName is empty, the default names for LXC
   122  		// (lxcbr0) and KVM (virbr0) will be used.
   123  		if args.ContainerBridgeName != "" {
   124  			logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName)
   125  			if icfg.AgentEnvironment == nil {
   126  				icfg.AgentEnvironment = make(map[string]string)
   127  			}
   128  			icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName
   129  		}
   130  	}
   131  	maybeSetBridge(instanceConfig)
   132  
   133  	fmt.Fprintln(ctx.GetStderr(), "Launching instance")
   134  	instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error {
   135  		fmt.Fprintf(ctx.GetStderr(), "%s      \r", info)
   136  		return nil
   137  	}
   138  	result, err := env.StartInstance(environs.StartInstanceParams{
   139  		Constraints:    args.BootstrapConstraints,
   140  		Tools:          availableTools,
   141  		InstanceConfig: instanceConfig,
   142  		Placement:      args.Placement,
   143  		ImageMetadata:  imageMetadata,
   144  		StatusCallback: instanceStatus,
   145  	})
   146  	if err != nil {
   147  		return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance")
   148  	}
   149  	fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id())
   150  
   151  	finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error {
   152  		icfg.InstanceId = result.Instance.Id()
   153  		icfg.HardwareCharacteristics = result.Hardware
   154  		envConfig := env.Config()
   155  		if result.Config != nil {
   156  			updated, err := envConfig.Apply(result.Config.UnknownAttrs())
   157  			if err != nil {
   158  				return errors.Trace(err)
   159  			}
   160  			envConfig = updated
   161  		}
   162  		if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil {
   163  			return err
   164  		}
   165  		maybeSetBridge(icfg)
   166  		return FinishBootstrap(ctx, client, env, result.Instance, icfg)
   167  	}
   168  	return result, selectedSeries, finalize, nil
   169  }
   170  
   171  // FinishBootstrap completes the bootstrap process by connecting
   172  // to the instance via SSH and carrying out the cloud-config.
   173  //
   174  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   175  var FinishBootstrap = func(
   176  	ctx environs.BootstrapContext,
   177  	client ssh.Client,
   178  	env environs.Environ,
   179  	inst instance.Instance,
   180  	instanceConfig *instancecfg.InstanceConfig,
   181  ) error {
   182  	interrupted := make(chan os.Signal, 1)
   183  	ctx.InterruptNotify(interrupted)
   184  	defer ctx.StopInterruptNotify(interrupted)
   185  	addr, err := WaitSSH(
   186  		ctx.GetStderr(),
   187  		interrupted,
   188  		client,
   189  		GetCheckNonceCommand(instanceConfig),
   190  		&RefreshableInstance{inst, env},
   191  		instanceConfig.Config.BootstrapSSHOpts(),
   192  	)
   193  	if err != nil {
   194  		return err
   195  	}
   196  	return ConfigureMachine(ctx, client, addr, instanceConfig)
   197  }
   198  
   199  func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string {
   200  	// Each attempt to connect to an address must verify the machine is the
   201  	// bootstrap machine by checking its nonce file exists and contains the
   202  	// nonce in the InstanceConfig. This also blocks sshinit from proceeding
   203  	// until cloud-init has completed, which is necessary to ensure apt
   204  	// invocations don't trample each other.
   205  	nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile))
   206  	checkNonceCommand := fmt.Sprintf(`
   207  	noncefile=%s
   208  	if [ ! -e "$noncefile" ]; then
   209  		echo "$noncefile does not exist" >&2
   210  		exit 1
   211  	fi
   212  	content=$(cat $noncefile)
   213  	if [ "$content" != %s ]; then
   214  		echo "$noncefile contents do not match machine nonce" >&2
   215  		exit 1
   216  	fi
   217  	`, nonceFile, utils.ShQuote(instanceConfig.MachineNonce))
   218  	return checkNonceCommand
   219  }
   220  
   221  func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error {
   222  	// Bootstrap is synchronous, and will spawn a subprocess
   223  	// to complete the procedure. If the user hits Ctrl-C,
   224  	// SIGINT is sent to the foreground process attached to
   225  	// the terminal, which will be the ssh subprocess at this
   226  	// point. For that reason, we do not call StopInterruptNotify
   227  	// until this function completes.
   228  	cloudcfg, err := cloudinit.New(instanceConfig.Series)
   229  	if err != nil {
   230  		return errors.Trace(err)
   231  	}
   232  
   233  	// Set packaging update here
   234  	cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate)
   235  	cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade)
   236  
   237  	udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg)
   238  	if err != nil {
   239  		return err
   240  	}
   241  	if err := udata.ConfigureJuju(); err != nil {
   242  		return err
   243  	}
   244  	configScript, err := cloudcfg.RenderScript()
   245  	if err != nil {
   246  		return err
   247  	}
   248  	script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript
   249  	return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
   250  		Host:           "ubuntu@" + host,
   251  		Client:         client,
   252  		Config:         cloudcfg,
   253  		ProgressWriter: ctx.GetStderr(),
   254  		Series:         instanceConfig.Series,
   255  	})
   256  }
   257  
   258  type Addresser interface {
   259  	// Refresh refreshes the addresses for the instance.
   260  	Refresh() error
   261  
   262  	// Addresses returns the addresses for the instance.
   263  	// To ensure that the results are up to date, call
   264  	// Refresh first.
   265  	Addresses() ([]network.Address, error)
   266  }
   267  
   268  type RefreshableInstance struct {
   269  	instance.Instance
   270  	Env environs.Environ
   271  }
   272  
   273  // Refresh refreshes the addresses for the instance.
   274  func (i *RefreshableInstance) Refresh() error {
   275  	instances, err := i.Env.Instances([]instance.Id{i.Id()})
   276  	if err != nil {
   277  		return errors.Trace(err)
   278  	}
   279  	i.Instance = instances[0]
   280  	return nil
   281  }
   282  
   283  type hostChecker struct {
   284  	addr   network.Address
   285  	client ssh.Client
   286  	wg     *sync.WaitGroup
   287  
   288  	// checkDelay is the amount of time to wait between retries.
   289  	checkDelay time.Duration
   290  
   291  	// checkHostScript is executed on the host via SSH.
   292  	// hostChecker.loop will return once the script
   293  	// runs without error.
   294  	checkHostScript string
   295  
   296  	// closed is closed to indicate that the host checker should
   297  	// return, without waiting for the result of any ongoing
   298  	// attempts.
   299  	closed <-chan struct{}
   300  }
   301  
   302  // Close implements io.Closer, as required by parallel.Try.
   303  func (*hostChecker) Close() error {
   304  	return nil
   305  }
   306  
   307  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   308  	defer hc.wg.Done()
   309  	// The value of connectSSH is taken outside the goroutine that may outlive
   310  	// hostChecker.loop, or we evoke the wrath of the race detector.
   311  	connectSSH := connectSSH
   312  	done := make(chan error, 1)
   313  	var lastErr error
   314  	for {
   315  		address := hc.addr.Value
   316  		go func() {
   317  			done <- connectSSH(hc.client, address, hc.checkHostScript)
   318  		}()
   319  		select {
   320  		case <-dying:
   321  			return hc, lastErr
   322  		case lastErr = <-done:
   323  			if lastErr == nil {
   324  				return hc, nil
   325  			}
   326  			logger.Debugf("connection attempt for %s failed: %v", address, lastErr)
   327  		}
   328  		select {
   329  		case <-hc.closed:
   330  			return hc, lastErr
   331  		case <-dying:
   332  		case <-time.After(hc.checkDelay):
   333  		}
   334  	}
   335  }
   336  
   337  type parallelHostChecker struct {
   338  	*parallel.Try
   339  	client ssh.Client
   340  	stderr io.Writer
   341  	wg     sync.WaitGroup
   342  
   343  	// active is a map of adresses to channels for addresses actively
   344  	// being tested. The goroutine testing the address will continue
   345  	// to attempt connecting to the address until it succeeds, the Try
   346  	// is killed, or the corresponding channel in this map is closed.
   347  	active map[network.Address]chan struct{}
   348  
   349  	// checkDelay is how long each hostChecker waits between attempts.
   350  	checkDelay time.Duration
   351  
   352  	// checkHostScript is the script to run on each host to check that
   353  	// it is the host we expect.
   354  	checkHostScript string
   355  }
   356  
   357  func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
   358  	for _, addr := range addrs {
   359  		if _, ok := p.active[addr]; ok {
   360  			continue
   361  		}
   362  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   363  		closed := make(chan struct{})
   364  		hc := &hostChecker{
   365  			addr:            addr,
   366  			client:          p.client,
   367  			checkDelay:      p.checkDelay,
   368  			checkHostScript: p.checkHostScript,
   369  			closed:          closed,
   370  			wg:              &p.wg,
   371  		}
   372  		p.wg.Add(1)
   373  		p.active[addr] = closed
   374  		p.Start(hc.loop)
   375  	}
   376  }
   377  
   378  // Close prevents additional functions from being added to
   379  // the Try, and tells each active hostChecker to exit.
   380  func (p *parallelHostChecker) Close() error {
   381  	// We signal each checker to stop and wait for them
   382  	// each to complete; this allows us to get the error,
   383  	// as opposed to when using try.Kill which does not
   384  	// wait for the functions to complete.
   385  	p.Try.Close()
   386  	for _, ch := range p.active {
   387  		close(ch)
   388  	}
   389  	return nil
   390  }
   391  
   392  // connectSSH is called to connect to the specified host and
   393  // execute the "checkHostScript" bash script on it.
   394  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   395  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   396  	cmd.Stdin = strings.NewReader(checkHostScript)
   397  	output, err := cmd.CombinedOutput()
   398  	if err != nil && len(output) > 0 {
   399  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   400  	}
   401  	return err
   402  }
   403  
   404  // WaitSSH waits for the instance to be assigned a routable
   405  // address, then waits until we can connect to it via SSH.
   406  //
   407  // waitSSH attempts on all addresses returned by the instance
   408  // in parallel; the first succeeding one wins. We ensure that
   409  // private addresses are for the correct machine by checking
   410  // the presence of a file on the machine that contains the
   411  // machine's nonce. The "checkHostScript" is a bash script
   412  // that performs this file check.
   413  func WaitSSH(stdErr io.Writer, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst Addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
   414  	globalTimeout := time.After(timeout.Timeout)
   415  	pollAddresses := time.NewTimer(0)
   416  
   417  	// checker checks each address in a loop, in parallel,
   418  	// until one succeeds, the global timeout is reached,
   419  	// or the tomb is killed.
   420  	checker := parallelHostChecker{
   421  		Try:             parallel.NewTry(0, nil),
   422  		client:          client,
   423  		stderr:          stdErr,
   424  		active:          make(map[network.Address]chan struct{}),
   425  		checkDelay:      timeout.RetryDelay,
   426  		checkHostScript: checkHostScript,
   427  	}
   428  	defer checker.wg.Wait()
   429  	defer checker.Kill()
   430  
   431  	fmt.Fprintln(stdErr, "Waiting for address")
   432  	for {
   433  		select {
   434  		case <-pollAddresses.C:
   435  			pollAddresses.Reset(timeout.AddressesDelay)
   436  			if err := inst.Refresh(); err != nil {
   437  				return "", fmt.Errorf("refreshing addresses: %v", err)
   438  			}
   439  			addresses, err := inst.Addresses()
   440  			if err != nil {
   441  				return "", fmt.Errorf("getting addresses: %v", err)
   442  			}
   443  			checker.UpdateAddresses(addresses)
   444  		case <-globalTimeout:
   445  			checker.Close()
   446  			lastErr := checker.Wait()
   447  			format := "waited for %v "
   448  			args := []interface{}{timeout.Timeout}
   449  			if len(checker.active) == 0 {
   450  				format += "without getting any addresses"
   451  			} else {
   452  				format += "without being able to connect"
   453  			}
   454  			if lastErr != nil && lastErr != parallel.ErrStopped {
   455  				format += ": %v"
   456  				args = append(args, lastErr)
   457  			}
   458  			return "", fmt.Errorf(format, args...)
   459  		case <-interrupted:
   460  			return "", fmt.Errorf("interrupted")
   461  		case <-checker.Dead():
   462  			result, err := checker.Result()
   463  			if err != nil {
   464  				return "", err
   465  			}
   466  			return result.(*hostChecker).addr.Value, nil
   467  		}
   468  	}
   469  }