github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/loggo"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/parallel"
    19  	"github.com/juju/utils/series"
    20  	"github.com/juju/utils/shell"
    21  	"github.com/juju/utils/ssh"
    22  
    23  	"github.com/juju/juju/agent"
    24  	"github.com/juju/juju/cloudconfig"
    25  	"github.com/juju/juju/cloudconfig/cloudinit"
    26  	"github.com/juju/juju/cloudconfig/instancecfg"
    27  	"github.com/juju/juju/cloudconfig/sshinit"
    28  	"github.com/juju/juju/environs"
    29  	"github.com/juju/juju/environs/config"
    30  	"github.com/juju/juju/environs/imagemetadata"
    31  	"github.com/juju/juju/environs/simplestreams"
    32  	"github.com/juju/juju/instance"
    33  	"github.com/juju/juju/network"
    34  	"github.com/juju/juju/status"
    35  	coretools "github.com/juju/juju/tools"
    36  )
    37  
    38  var logger = loggo.GetLogger("juju.provider.common")
    39  
    40  // Bootstrap is a common implementation of the Bootstrap method defined on
    41  // environs.Environ; we strongly recommend that this implementation be used
    42  // when writing a new provider.
    43  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    44  ) (*environs.BootstrapResult, error) {
    45  	result, series, finalizer, err := BootstrapInstance(ctx, env, args)
    46  	if err != nil {
    47  		return nil, errors.Trace(err)
    48  	}
    49  
    50  	bsResult := &environs.BootstrapResult{
    51  		Arch:     *result.Hardware.Arch,
    52  		Series:   series,
    53  		Finalize: finalizer,
    54  	}
    55  	return bsResult, nil
    56  }
    57  
    58  // BootstrapInstance creates a new instance with the series of its choice,
    59  // constrained to those of the available tools, and
    60  // returns the instance result, series, and a function that
    61  // must be called to finalize the bootstrap process by transferring
    62  // the tools and installing the initial Juju controller.
    63  // This method is called by Bootstrap above, which implements environs.Bootstrap, but
    64  // is also exported so that providers can manipulate the started instance.
    65  func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    66  ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) {
    67  	// TODO make safe in the case of racing Bootstraps
    68  	// If two Bootstraps are called concurrently, there's
    69  	// no way to make sure that only one succeeds.
    70  
    71  	// First thing, ensure we have tools otherwise there's no point.
    72  	if args.BootstrapSeries != "" {
    73  		selectedSeries = args.BootstrapSeries
    74  	} else {
    75  		selectedSeries = config.PreferredSeries(env.Config())
    76  	}
    77  	availableTools, err := args.AvailableTools.Match(coretools.Filter{
    78  		Series: selectedSeries,
    79  	})
    80  	if err != nil {
    81  		return nil, "", nil, err
    82  	}
    83  
    84  	// Filter image metadata to the selected series.
    85  	var imageMetadata []*imagemetadata.ImageMetadata
    86  	seriesVersion, err := series.SeriesVersion(selectedSeries)
    87  	if err != nil {
    88  		return nil, "", nil, errors.Trace(err)
    89  	}
    90  	for _, m := range args.ImageMetadata {
    91  		if m.Version != seriesVersion {
    92  			continue
    93  		}
    94  		imageMetadata = append(imageMetadata, m)
    95  	}
    96  
    97  	// Get the bootstrap SSH client. Do this early, so we know
    98  	// not to bother with any of the below if we can't finish the job.
    99  	client := ssh.DefaultClient
   100  	if client == nil {
   101  		// This should never happen: if we don't have OpenSSH, then
   102  		// go.crypto/ssh should be used with an auto-generated key.
   103  		return nil, "", nil, fmt.Errorf("no SSH client available")
   104  	}
   105  
   106  	publicKey, err := simplestreams.UserPublicSigningKey()
   107  	if err != nil {
   108  		return nil, "", nil, err
   109  	}
   110  	envCfg := env.Config()
   111  	instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(
   112  		args.ControllerConfig, args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey,
   113  	)
   114  	if err != nil {
   115  		return nil, "", nil, err
   116  	}
   117  	instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate()
   118  	instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade()
   119  
   120  	instanceConfig.Tags = instancecfg.InstanceTags(envCfg.UUID(), args.ControllerConfig.ControllerUUID(), envCfg, instanceConfig.Jobs)
   121  	maybeSetBridge := func(icfg *instancecfg.InstanceConfig) {
   122  		// If we need to override the default bridge name, do it now. When
   123  		// args.ContainerBridgeName is empty, the default names for LXC
   124  		// (lxcbr0) and KVM (virbr0) will be used.
   125  		if args.ContainerBridgeName != "" {
   126  			logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName)
   127  			if icfg.AgentEnvironment == nil {
   128  				icfg.AgentEnvironment = make(map[string]string)
   129  			}
   130  			icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName
   131  		}
   132  	}
   133  	maybeSetBridge(instanceConfig)
   134  
   135  	cloudRegion := args.CloudName
   136  	if args.CloudRegion != "" {
   137  		cloudRegion += "/" + args.CloudRegion
   138  	}
   139  	fmt.Fprintf(ctx.GetStderr(), "Launching controller instance(s) on %s...\n", cloudRegion)
   140  	// Print instance status reports status changes during provisioning.
   141  	// Note the carriage returns, meaning subsequent prints are to the same
   142  	// line of stderr, not a new line.
   143  	instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error {
   144  		// The data arg is not expected to be used in this case, but
   145  		// print it, rather than ignore it, if we get something.
   146  		dataString := ""
   147  		if len(data) > 0 {
   148  			dataString = fmt.Sprintf(" %v", data)
   149  		}
   150  		fmt.Fprintf(ctx.GetStderr(), " - %s%s\r", info, dataString)
   151  		return nil
   152  	}
   153  	// Likely used after the final instanceStatus call to white-out the
   154  	// current stderr line before the next use, removing any residual status
   155  	// reporting output.
   156  	statusCleanup := func(info string) error {
   157  		// The leading spaces account for the leading characters
   158  		// emitted by instanceStatus above.
   159  		fmt.Fprintf(ctx.GetStderr(), "   %s\r", info)
   160  		return nil
   161  	}
   162  	result, err := env.StartInstance(environs.StartInstanceParams{
   163  		ControllerUUID:  args.ControllerConfig.ControllerUUID(),
   164  		Constraints:     args.BootstrapConstraints,
   165  		Tools:           availableTools,
   166  		InstanceConfig:  instanceConfig,
   167  		Placement:       args.Placement,
   168  		ImageMetadata:   imageMetadata,
   169  		StatusCallback:  instanceStatus,
   170  		CleanupCallback: statusCleanup,
   171  	})
   172  	if err != nil {
   173  		return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance")
   174  	}
   175  	// We need some padding below to overwrite any previous messages. We'll use a width of 40.
   176  	msg := fmt.Sprintf(" - %s", result.Instance.Id())
   177  	if len(msg) < 40 {
   178  		padding := make([]string, 40-len(msg))
   179  		msg += strings.Join(padding, " ")
   180  	}
   181  	fmt.Fprintln(ctx.GetStderr(), msg)
   182  
   183  	finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig, opts environs.BootstrapDialOpts) error {
   184  		icfg.Bootstrap.BootstrapMachineInstanceId = result.Instance.Id()
   185  		icfg.Bootstrap.BootstrapMachineHardwareCharacteristics = result.Hardware
   186  		envConfig := env.Config()
   187  		if result.Config != nil {
   188  			updated, err := envConfig.Apply(result.Config.UnknownAttrs())
   189  			if err != nil {
   190  				return errors.Trace(err)
   191  			}
   192  			envConfig = updated
   193  		}
   194  		if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil {
   195  			return err
   196  		}
   197  		maybeSetBridge(icfg)
   198  		return FinishBootstrap(ctx, client, env, result.Instance, icfg, opts)
   199  	}
   200  	return result, selectedSeries, finalize, nil
   201  }
   202  
   203  // FinishBootstrap completes the bootstrap process by connecting
   204  // to the instance via SSH and carrying out the cloud-config.
   205  //
   206  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   207  var FinishBootstrap = func(
   208  	ctx environs.BootstrapContext,
   209  	client ssh.Client,
   210  	env environs.Environ,
   211  	inst instance.Instance,
   212  	instanceConfig *instancecfg.InstanceConfig,
   213  	opts environs.BootstrapDialOpts,
   214  ) error {
   215  	interrupted := make(chan os.Signal, 1)
   216  	ctx.InterruptNotify(interrupted)
   217  	defer ctx.StopInterruptNotify(interrupted)
   218  	addr, err := WaitSSH(
   219  		ctx.GetStderr(),
   220  		interrupted,
   221  		client,
   222  		GetCheckNonceCommand(instanceConfig),
   223  		&RefreshableInstance{inst, env},
   224  		opts,
   225  	)
   226  	if err != nil {
   227  		return err
   228  	}
   229  	return ConfigureMachine(ctx, client, addr, instanceConfig)
   230  }
   231  
   232  func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string {
   233  	// Each attempt to connect to an address must verify the machine is the
   234  	// bootstrap machine by checking its nonce file exists and contains the
   235  	// nonce in the InstanceConfig. This also blocks sshinit from proceeding
   236  	// until cloud-init has completed, which is necessary to ensure apt
   237  	// invocations don't trample each other.
   238  	nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile))
   239  	checkNonceCommand := fmt.Sprintf(`
   240  	noncefile=%s
   241  	if [ ! -e "$noncefile" ]; then
   242  		echo "$noncefile does not exist" >&2
   243  		exit 1
   244  	fi
   245  	content=$(cat $noncefile)
   246  	if [ "$content" != %s ]; then
   247  		echo "$noncefile contents do not match machine nonce" >&2
   248  		exit 1
   249  	fi
   250  	`, nonceFile, utils.ShQuote(instanceConfig.MachineNonce))
   251  	return checkNonceCommand
   252  }
   253  
   254  func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error {
   255  	// Bootstrap is synchronous, and will spawn a subprocess
   256  	// to complete the procedure. If the user hits Ctrl-C,
   257  	// SIGINT is sent to the foreground process attached to
   258  	// the terminal, which will be the ssh subprocess at this
   259  	// point. For that reason, we do not call StopInterruptNotify
   260  	// until this function completes.
   261  	cloudcfg, err := cloudinit.New(instanceConfig.Series)
   262  	if err != nil {
   263  		return errors.Trace(err)
   264  	}
   265  
   266  	// Set packaging update here
   267  	cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate)
   268  	cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade)
   269  
   270  	udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg)
   271  	if err != nil {
   272  		return err
   273  	}
   274  	if err := udata.ConfigureJuju(); err != nil {
   275  		return err
   276  	}
   277  	configScript, err := cloudcfg.RenderScript()
   278  	if err != nil {
   279  		return err
   280  	}
   281  	script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript
   282  	return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
   283  		Host:           "ubuntu@" + host,
   284  		Client:         client,
   285  		Config:         cloudcfg,
   286  		ProgressWriter: ctx.GetStderr(),
   287  		Series:         instanceConfig.Series,
   288  	})
   289  }
   290  
   291  // InstanceRefresher is the subet of the Instance interface required
   292  // for waiting for SSH access to become availble.
   293  type InstanceRefresher interface {
   294  	// Refresh refreshes the addresses for the instance.
   295  	Refresh() error
   296  
   297  	// Addresses returns the addresses for the instance.
   298  	// To ensure that the results are up to date, call
   299  	// Refresh first.
   300  	Addresses() ([]network.Address, error)
   301  
   302  	// Status returns the provider-specific status for the
   303  	// instance.
   304  	Status() instance.InstanceStatus
   305  }
   306  
   307  type RefreshableInstance struct {
   308  	instance.Instance
   309  	Env environs.Environ
   310  }
   311  
   312  // Refresh refreshes the addresses for the instance.
   313  func (i *RefreshableInstance) Refresh() error {
   314  	instances, err := i.Env.Instances([]instance.Id{i.Id()})
   315  	if err != nil {
   316  		return errors.Trace(err)
   317  	}
   318  	i.Instance = instances[0]
   319  	return nil
   320  }
   321  
   322  type hostChecker struct {
   323  	addr   network.Address
   324  	client ssh.Client
   325  	wg     *sync.WaitGroup
   326  
   327  	// checkDelay is the amount of time to wait between retries.
   328  	checkDelay time.Duration
   329  
   330  	// checkHostScript is executed on the host via SSH.
   331  	// hostChecker.loop will return once the script
   332  	// runs without error.
   333  	checkHostScript string
   334  
   335  	// closed is closed to indicate that the host checker should
   336  	// return, without waiting for the result of any ongoing
   337  	// attempts.
   338  	closed <-chan struct{}
   339  }
   340  
   341  // Close implements io.Closer, as required by parallel.Try.
   342  func (*hostChecker) Close() error {
   343  	return nil
   344  }
   345  
   346  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   347  	defer hc.wg.Done()
   348  	// The value of connectSSH is taken outside the goroutine that may outlive
   349  	// hostChecker.loop, or we evoke the wrath of the race detector.
   350  	connectSSH := connectSSH
   351  	done := make(chan error, 1)
   352  	var lastErr error
   353  	for {
   354  		address := hc.addr.Value
   355  		go func() {
   356  			done <- connectSSH(hc.client, address, hc.checkHostScript)
   357  		}()
   358  		select {
   359  		case <-dying:
   360  			return hc, lastErr
   361  		case lastErr = <-done:
   362  			if lastErr == nil {
   363  				return hc, nil
   364  			}
   365  			logger.Debugf("connection attempt for %s failed: %v", address, lastErr)
   366  		}
   367  		select {
   368  		case <-hc.closed:
   369  			return hc, lastErr
   370  		case <-dying:
   371  		case <-time.After(hc.checkDelay):
   372  		}
   373  	}
   374  }
   375  
   376  type parallelHostChecker struct {
   377  	*parallel.Try
   378  	client ssh.Client
   379  	stderr io.Writer
   380  	wg     sync.WaitGroup
   381  
   382  	// active is a map of adresses to channels for addresses actively
   383  	// being tested. The goroutine testing the address will continue
   384  	// to attempt connecting to the address until it succeeds, the Try
   385  	// is killed, or the corresponding channel in this map is closed.
   386  	active map[network.Address]chan struct{}
   387  
   388  	// checkDelay is how long each hostChecker waits between attempts.
   389  	checkDelay time.Duration
   390  
   391  	// checkHostScript is the script to run on each host to check that
   392  	// it is the host we expect.
   393  	checkHostScript string
   394  }
   395  
   396  func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
   397  	for _, addr := range addrs {
   398  		if _, ok := p.active[addr]; ok {
   399  			continue
   400  		}
   401  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   402  		closed := make(chan struct{})
   403  		hc := &hostChecker{
   404  			addr:            addr,
   405  			client:          p.client,
   406  			checkDelay:      p.checkDelay,
   407  			checkHostScript: p.checkHostScript,
   408  			closed:          closed,
   409  			wg:              &p.wg,
   410  		}
   411  		p.wg.Add(1)
   412  		p.active[addr] = closed
   413  		p.Start(hc.loop)
   414  	}
   415  }
   416  
   417  // Close prevents additional functions from being added to
   418  // the Try, and tells each active hostChecker to exit.
   419  func (p *parallelHostChecker) Close() error {
   420  	// We signal each checker to stop and wait for them
   421  	// each to complete; this allows us to get the error,
   422  	// as opposed to when using try.Kill which does not
   423  	// wait for the functions to complete.
   424  	p.Try.Close()
   425  	for _, ch := range p.active {
   426  		close(ch)
   427  	}
   428  	return nil
   429  }
   430  
   431  // connectSSH is called to connect to the specified host and
   432  // execute the "checkHostScript" bash script on it.
   433  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   434  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   435  	cmd.Stdin = strings.NewReader(checkHostScript)
   436  	output, err := cmd.CombinedOutput()
   437  	if err != nil && len(output) > 0 {
   438  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   439  	}
   440  	return err
   441  }
   442  
   443  // WaitSSH waits for the instance to be assigned a routable
   444  // address, then waits until we can connect to it via SSH.
   445  //
   446  // waitSSH attempts on all addresses returned by the instance
   447  // in parallel; the first succeeding one wins. We ensure that
   448  // private addresses are for the correct machine by checking
   449  // the presence of a file on the machine that contains the
   450  // machine's nonce. The "checkHostScript" is a bash script
   451  // that performs this file check.
   452  func WaitSSH(
   453  	stdErr io.Writer,
   454  	interrupted <-chan os.Signal,
   455  	client ssh.Client,
   456  	checkHostScript string,
   457  	inst InstanceRefresher,
   458  	opts environs.BootstrapDialOpts,
   459  ) (addr string, err error) {
   460  	globalTimeout := time.After(opts.Timeout)
   461  	pollAddresses := time.NewTimer(0)
   462  
   463  	// checker checks each address in a loop, in parallel,
   464  	// until one succeeds, the global timeout is reached,
   465  	// or the tomb is killed.
   466  	checker := parallelHostChecker{
   467  		Try:             parallel.NewTry(0, nil),
   468  		client:          client,
   469  		stderr:          stdErr,
   470  		active:          make(map[network.Address]chan struct{}),
   471  		checkDelay:      opts.RetryDelay,
   472  		checkHostScript: checkHostScript,
   473  	}
   474  	defer checker.wg.Wait()
   475  	defer checker.Kill()
   476  
   477  	fmt.Fprintln(stdErr, "Waiting for address")
   478  	for {
   479  		select {
   480  		case <-pollAddresses.C:
   481  			pollAddresses.Reset(opts.AddressesDelay)
   482  			if err := inst.Refresh(); err != nil {
   483  				return "", fmt.Errorf("refreshing addresses: %v", err)
   484  			}
   485  			instanceStatus := inst.Status()
   486  			if instanceStatus.Status == status.ProvisioningError {
   487  				if instanceStatus.Message != "" {
   488  					return "", errors.Errorf("instance provisioning failed (%v)", instanceStatus.Message)
   489  				}
   490  				return "", errors.Errorf("instance provisioning failed")
   491  			}
   492  			addresses, err := inst.Addresses()
   493  			if err != nil {
   494  				return "", fmt.Errorf("getting addresses: %v", err)
   495  			}
   496  			checker.UpdateAddresses(addresses)
   497  		case <-globalTimeout:
   498  			checker.Close()
   499  			lastErr := checker.Wait()
   500  			format := "waited for %v "
   501  			args := []interface{}{opts.Timeout}
   502  			if len(checker.active) == 0 {
   503  				format += "without getting any addresses"
   504  			} else {
   505  				format += "without being able to connect"
   506  			}
   507  			if lastErr != nil && lastErr != parallel.ErrStopped {
   508  				format += ": %v"
   509  				args = append(args, lastErr)
   510  			}
   511  			return "", fmt.Errorf(format, args...)
   512  		case <-interrupted:
   513  			return "", fmt.Errorf("interrupted")
   514  		case <-checker.Dead():
   515  			result, err := checker.Result()
   516  			if err != nil {
   517  				return "", err
   518  			}
   519  			return result.(*hostChecker).addr.Value, nil
   520  		}
   521  	}
   522  }