github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/loggo"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/parallel"
    19  	"github.com/juju/utils/series"
    20  	"github.com/juju/utils/shell"
    21  	"github.com/juju/utils/ssh"
    22  
    23  	"github.com/juju/juju/agent"
    24  	"github.com/juju/juju/cloudconfig"
    25  	"github.com/juju/juju/cloudconfig/cloudinit"
    26  	"github.com/juju/juju/cloudconfig/instancecfg"
    27  	"github.com/juju/juju/cloudconfig/sshinit"
    28  	"github.com/juju/juju/environs"
    29  	"github.com/juju/juju/environs/config"
    30  	"github.com/juju/juju/environs/imagemetadata"
    31  	"github.com/juju/juju/environs/simplestreams"
    32  	"github.com/juju/juju/instance"
    33  	"github.com/juju/juju/network"
    34  	"github.com/juju/juju/status"
    35  	coretools "github.com/juju/juju/tools"
    36  )
    37  
    38  var logger = loggo.GetLogger("juju.provider.common")
    39  
    40  // Bootstrap is a common implementation of the Bootstrap method defined on
    41  // environs.Environ; we strongly recommend that this implementation be used
    42  // when writing a new provider.
    43  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    44  ) (*environs.BootstrapResult, error) {
    45  	result, series, finalizer, err := BootstrapInstance(ctx, env, args)
    46  	if err != nil {
    47  		return nil, errors.Trace(err)
    48  	}
    49  
    50  	bsResult := &environs.BootstrapResult{
    51  		Arch:     *result.Hardware.Arch,
    52  		Series:   series,
    53  		Finalize: finalizer,
    54  	}
    55  	return bsResult, nil
    56  }
    57  
    58  // BootstrapInstance creates a new instance with the series of its choice,
    59  // constrained to those of the available tools, and
    60  // returns the instance result, series, and a function that
    61  // must be called to finalize the bootstrap process by transferring
    62  // the tools and installing the initial Juju controller.
    63  // This method is called by Bootstrap above, which implements environs.Bootstrap, but
    64  // is also exported so that providers can manipulate the started instance.
    65  func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    66  ) (_ *environs.StartInstanceResult, selectedSeries string, _ environs.BootstrapFinalizer, err error) {
    67  	// TODO make safe in the case of racing Bootstraps
    68  	// If two Bootstraps are called concurrently, there's
    69  	// no way to make sure that only one succeeds.
    70  
    71  	// First thing, ensure we have tools otherwise there's no point.
    72  	if args.BootstrapSeries != "" {
    73  		selectedSeries = args.BootstrapSeries
    74  	} else {
    75  		selectedSeries = config.PreferredSeries(env.Config())
    76  	}
    77  	availableTools, err := args.AvailableTools.Match(coretools.Filter{
    78  		Series: selectedSeries,
    79  	})
    80  	if err != nil {
    81  		return nil, "", nil, err
    82  	}
    83  
    84  	// Filter image metadata to the selected series.
    85  	var imageMetadata []*imagemetadata.ImageMetadata
    86  	seriesVersion, err := series.SeriesVersion(selectedSeries)
    87  	if err != nil {
    88  		return nil, "", nil, errors.Trace(err)
    89  	}
    90  	for _, m := range args.ImageMetadata {
    91  		if m.Version != seriesVersion {
    92  			continue
    93  		}
    94  		imageMetadata = append(imageMetadata, m)
    95  	}
    96  
    97  	// Get the bootstrap SSH client. Do this early, so we know
    98  	// not to bother with any of the below if we can't finish the job.
    99  	client := ssh.DefaultClient
   100  	if client == nil {
   101  		// This should never happen: if we don't have OpenSSH, then
   102  		// go.crypto/ssh should be used with an auto-generated key.
   103  		return nil, "", nil, fmt.Errorf("no SSH client available")
   104  	}
   105  
   106  	publicKey, err := simplestreams.UserPublicSigningKey()
   107  	if err != nil {
   108  		return nil, "", nil, err
   109  	}
   110  	envCfg := env.Config()
   111  	instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(
   112  		args.ControllerConfig, args.BootstrapConstraints, args.ModelConstraints, selectedSeries, publicKey,
   113  	)
   114  	if err != nil {
   115  		return nil, "", nil, err
   116  	}
   117  	instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate()
   118  	instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade()
   119  
   120  	instanceConfig.Tags = instancecfg.InstanceTags(envCfg.UUID(), args.ControllerConfig.ControllerUUID(), envCfg, instanceConfig.Jobs)
   121  	maybeSetBridge := func(icfg *instancecfg.InstanceConfig) {
   122  		// If we need to override the default bridge name, do it now. When
   123  		// args.ContainerBridgeName is empty, the default names for LXC
   124  		// (lxcbr0) and KVM (virbr0) will be used.
   125  		if args.ContainerBridgeName != "" {
   126  			logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName)
   127  			if icfg.AgentEnvironment == nil {
   128  				icfg.AgentEnvironment = make(map[string]string)
   129  			}
   130  			icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName
   131  		}
   132  	}
   133  	maybeSetBridge(instanceConfig)
   134  
   135  	bootstrapMsg := env.BootstrapMessage()
   136  	if bootstrapMsg != "" {
   137  		ctx.Infof(bootstrapMsg)
   138  	}
   139  
   140  	cloudRegion := args.CloudName
   141  	if args.CloudRegion != "" {
   142  		cloudRegion += "/" + args.CloudRegion
   143  	}
   144  	fmt.Fprintf(ctx.GetStderr(), "Launching controller instance(s) on %s...\n", cloudRegion)
   145  	// Print instance status reports status changes during provisioning.
   146  	// Note the carriage returns, meaning subsequent prints are to the same
   147  	// line of stderr, not a new line.
   148  	instanceStatus := func(settableStatus status.Status, info string, data map[string]interface{}) error {
   149  		// The data arg is not expected to be used in this case, but
   150  		// print it, rather than ignore it, if we get something.
   151  		dataString := ""
   152  		if len(data) > 0 {
   153  			dataString = fmt.Sprintf(" %v", data)
   154  		}
   155  		fmt.Fprintf(ctx.GetStderr(), " - %s%s\r", info, dataString)
   156  		return nil
   157  	}
   158  	// Likely used after the final instanceStatus call to white-out the
   159  	// current stderr line before the next use, removing any residual status
   160  	// reporting output.
   161  	statusCleanup := func(info string) error {
   162  		// The leading spaces account for the leading characters
   163  		// emitted by instanceStatus above.
   164  		fmt.Fprintf(ctx.GetStderr(), "   %s\r", info)
   165  		return nil
   166  	}
   167  	result, err := env.StartInstance(environs.StartInstanceParams{
   168  		ControllerUUID:  args.ControllerConfig.ControllerUUID(),
   169  		Constraints:     args.BootstrapConstraints,
   170  		Tools:           availableTools,
   171  		InstanceConfig:  instanceConfig,
   172  		Placement:       args.Placement,
   173  		ImageMetadata:   imageMetadata,
   174  		StatusCallback:  instanceStatus,
   175  		CleanupCallback: statusCleanup,
   176  	})
   177  	if err != nil {
   178  		return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance")
   179  	}
   180  
   181  	msg := fmt.Sprintf(" - %s (%s)", result.Instance.Id(), formatHardware(result.Hardware))
   182  	// We need some padding below to overwrite any previous messages.
   183  	if len(msg) < 40 {
   184  		padding := make([]string, 40-len(msg))
   185  		msg += strings.Join(padding, " ")
   186  	}
   187  	fmt.Fprintln(ctx.GetStderr(), msg)
   188  
   189  	finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig, opts environs.BootstrapDialOpts) error {
   190  		icfg.Bootstrap.BootstrapMachineInstanceId = result.Instance.Id()
   191  		icfg.Bootstrap.BootstrapMachineHardwareCharacteristics = result.Hardware
   192  		envConfig := env.Config()
   193  		if result.Config != nil {
   194  			updated, err := envConfig.Apply(result.Config.UnknownAttrs())
   195  			if err != nil {
   196  				return errors.Trace(err)
   197  			}
   198  			envConfig = updated
   199  		}
   200  		if err := instancecfg.FinishInstanceConfig(icfg, envConfig); err != nil {
   201  			return err
   202  		}
   203  		maybeSetBridge(icfg)
   204  		return FinishBootstrap(ctx, client, env, result.Instance, icfg, opts)
   205  	}
   206  	return result, selectedSeries, finalize, nil
   207  }
   208  
   209  func formatHardware(hw *instance.HardwareCharacteristics) string {
   210  	if hw == nil {
   211  		return ""
   212  	}
   213  	out := make([]string, 0, 3)
   214  	if hw.Arch != nil && *hw.Arch != "" {
   215  		out = append(out, fmt.Sprintf("arch=%s", *hw.Arch))
   216  	}
   217  	if hw.Mem != nil && *hw.Mem > 0 {
   218  		out = append(out, fmt.Sprintf("mem=%s", formatMemory(*hw.Mem)))
   219  	}
   220  	if hw.CpuCores != nil && *hw.CpuCores > 0 {
   221  		out = append(out, fmt.Sprintf("cores=%d", *hw.CpuCores))
   222  	}
   223  	return strings.Join(out, " ")
   224  }
   225  
   226  func formatMemory(m uint64) string {
   227  	if m < 1024 {
   228  		return fmt.Sprintf("%dM", m)
   229  	}
   230  	s := fmt.Sprintf("%.1f", float32(m)/1024.0)
   231  	return strings.TrimSuffix(s, ".0") + "G"
   232  }
   233  
   234  // FinishBootstrap completes the bootstrap process by connecting
   235  // to the instance via SSH and carrying out the cloud-config.
   236  //
   237  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   238  var FinishBootstrap = func(
   239  	ctx environs.BootstrapContext,
   240  	client ssh.Client,
   241  	env environs.Environ,
   242  	inst instance.Instance,
   243  	instanceConfig *instancecfg.InstanceConfig,
   244  	opts environs.BootstrapDialOpts,
   245  ) error {
   246  	interrupted := make(chan os.Signal, 1)
   247  	ctx.InterruptNotify(interrupted)
   248  	defer ctx.StopInterruptNotify(interrupted)
   249  	addr, err := WaitSSH(
   250  		ctx.GetStderr(),
   251  		interrupted,
   252  		client,
   253  		GetCheckNonceCommand(instanceConfig),
   254  		&RefreshableInstance{inst, env},
   255  		opts,
   256  	)
   257  	if err != nil {
   258  		return err
   259  	}
   260  	return ConfigureMachine(ctx, client, addr, instanceConfig)
   261  }
   262  
   263  func GetCheckNonceCommand(instanceConfig *instancecfg.InstanceConfig) string {
   264  	// Each attempt to connect to an address must verify the machine is the
   265  	// bootstrap machine by checking its nonce file exists and contains the
   266  	// nonce in the InstanceConfig. This also blocks sshinit from proceeding
   267  	// until cloud-init has completed, which is necessary to ensure apt
   268  	// invocations don't trample each other.
   269  	nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile))
   270  	checkNonceCommand := fmt.Sprintf(`
   271  	noncefile=%s
   272  	if [ ! -e "$noncefile" ]; then
   273  		echo "$noncefile does not exist" >&2
   274  		exit 1
   275  	fi
   276  	content=$(cat $noncefile)
   277  	if [ "$content" != %s ]; then
   278  		echo "$noncefile contents do not match machine nonce" >&2
   279  		exit 1
   280  	fi
   281  	`, nonceFile, utils.ShQuote(instanceConfig.MachineNonce))
   282  	return checkNonceCommand
   283  }
   284  
   285  func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error {
   286  	// Bootstrap is synchronous, and will spawn a subprocess
   287  	// to complete the procedure. If the user hits Ctrl-C,
   288  	// SIGINT is sent to the foreground process attached to
   289  	// the terminal, which will be the ssh subprocess at this
   290  	// point. For that reason, we do not call StopInterruptNotify
   291  	// until this function completes.
   292  	cloudcfg, err := cloudinit.New(instanceConfig.Series)
   293  	if err != nil {
   294  		return errors.Trace(err)
   295  	}
   296  
   297  	// Set packaging update here
   298  	cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate)
   299  	cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade)
   300  
   301  	udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg)
   302  	if err != nil {
   303  		return err
   304  	}
   305  	if err := udata.ConfigureJuju(); err != nil {
   306  		return err
   307  	}
   308  	configScript, err := cloudcfg.RenderScript()
   309  	if err != nil {
   310  		return err
   311  	}
   312  	script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript
   313  	return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
   314  		Host:           "ubuntu@" + host,
   315  		Client:         client,
   316  		Config:         cloudcfg,
   317  		ProgressWriter: ctx.GetStderr(),
   318  		Series:         instanceConfig.Series,
   319  	})
   320  }
   321  
   322  // InstanceRefresher is the subet of the Instance interface required
   323  // for waiting for SSH access to become availble.
   324  type InstanceRefresher interface {
   325  	// Refresh refreshes the addresses for the instance.
   326  	Refresh() error
   327  
   328  	// Addresses returns the addresses for the instance.
   329  	// To ensure that the results are up to date, call
   330  	// Refresh first.
   331  	Addresses() ([]network.Address, error)
   332  
   333  	// Status returns the provider-specific status for the
   334  	// instance.
   335  	Status() instance.InstanceStatus
   336  }
   337  
   338  type RefreshableInstance struct {
   339  	instance.Instance
   340  	Env environs.Environ
   341  }
   342  
   343  // Refresh refreshes the addresses for the instance.
   344  func (i *RefreshableInstance) Refresh() error {
   345  	instances, err := i.Env.Instances([]instance.Id{i.Id()})
   346  	if err != nil {
   347  		return errors.Trace(err)
   348  	}
   349  	i.Instance = instances[0]
   350  	return nil
   351  }
   352  
   353  type hostChecker struct {
   354  	addr   network.Address
   355  	client ssh.Client
   356  	wg     *sync.WaitGroup
   357  
   358  	// checkDelay is the amount of time to wait between retries.
   359  	checkDelay time.Duration
   360  
   361  	// checkHostScript is executed on the host via SSH.
   362  	// hostChecker.loop will return once the script
   363  	// runs without error.
   364  	checkHostScript string
   365  
   366  	// closed is closed to indicate that the host checker should
   367  	// return, without waiting for the result of any ongoing
   368  	// attempts.
   369  	closed <-chan struct{}
   370  }
   371  
   372  // Close implements io.Closer, as required by parallel.Try.
   373  func (*hostChecker) Close() error {
   374  	return nil
   375  }
   376  
   377  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   378  	defer hc.wg.Done()
   379  	// The value of connectSSH is taken outside the goroutine that may outlive
   380  	// hostChecker.loop, or we evoke the wrath of the race detector.
   381  	connectSSH := connectSSH
   382  	done := make(chan error, 1)
   383  	var lastErr error
   384  	for {
   385  		address := hc.addr.Value
   386  		go func() {
   387  			done <- connectSSH(hc.client, address, hc.checkHostScript)
   388  		}()
   389  		select {
   390  		case <-dying:
   391  			return hc, lastErr
   392  		case lastErr = <-done:
   393  			if lastErr == nil {
   394  				return hc, nil
   395  			}
   396  			logger.Debugf("connection attempt for %s failed: %v", address, lastErr)
   397  		}
   398  		select {
   399  		case <-hc.closed:
   400  			return hc, lastErr
   401  		case <-dying:
   402  		case <-time.After(hc.checkDelay):
   403  		}
   404  	}
   405  }
   406  
   407  type parallelHostChecker struct {
   408  	*parallel.Try
   409  	client ssh.Client
   410  	stderr io.Writer
   411  	wg     sync.WaitGroup
   412  
   413  	// active is a map of adresses to channels for addresses actively
   414  	// being tested. The goroutine testing the address will continue
   415  	// to attempt connecting to the address until it succeeds, the Try
   416  	// is killed, or the corresponding channel in this map is closed.
   417  	active map[network.Address]chan struct{}
   418  
   419  	// checkDelay is how long each hostChecker waits between attempts.
   420  	checkDelay time.Duration
   421  
   422  	// checkHostScript is the script to run on each host to check that
   423  	// it is the host we expect.
   424  	checkHostScript string
   425  }
   426  
   427  func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
   428  	for _, addr := range addrs {
   429  		if _, ok := p.active[addr]; ok {
   430  			continue
   431  		}
   432  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   433  		closed := make(chan struct{})
   434  		hc := &hostChecker{
   435  			addr:            addr,
   436  			client:          p.client,
   437  			checkDelay:      p.checkDelay,
   438  			checkHostScript: p.checkHostScript,
   439  			closed:          closed,
   440  			wg:              &p.wg,
   441  		}
   442  		p.wg.Add(1)
   443  		p.active[addr] = closed
   444  		p.Start(hc.loop)
   445  	}
   446  }
   447  
   448  // Close prevents additional functions from being added to
   449  // the Try, and tells each active hostChecker to exit.
   450  func (p *parallelHostChecker) Close() error {
   451  	// We signal each checker to stop and wait for them
   452  	// each to complete; this allows us to get the error,
   453  	// as opposed to when using try.Kill which does not
   454  	// wait for the functions to complete.
   455  	p.Try.Close()
   456  	for _, ch := range p.active {
   457  		close(ch)
   458  	}
   459  	return nil
   460  }
   461  
   462  // connectSSH is called to connect to the specified host and
   463  // execute the "checkHostScript" bash script on it.
   464  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   465  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   466  	cmd.Stdin = strings.NewReader(checkHostScript)
   467  	output, err := cmd.CombinedOutput()
   468  	if err != nil && len(output) > 0 {
   469  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   470  	}
   471  	return err
   472  }
   473  
   474  // WaitSSH waits for the instance to be assigned a routable
   475  // address, then waits until we can connect to it via SSH.
   476  //
   477  // waitSSH attempts on all addresses returned by the instance
   478  // in parallel; the first succeeding one wins. We ensure that
   479  // private addresses are for the correct machine by checking
   480  // the presence of a file on the machine that contains the
   481  // machine's nonce. The "checkHostScript" is a bash script
   482  // that performs this file check.
   483  func WaitSSH(
   484  	stdErr io.Writer,
   485  	interrupted <-chan os.Signal,
   486  	client ssh.Client,
   487  	checkHostScript string,
   488  	inst InstanceRefresher,
   489  	opts environs.BootstrapDialOpts,
   490  ) (addr string, err error) {
   491  	globalTimeout := time.After(opts.Timeout)
   492  	pollAddresses := time.NewTimer(0)
   493  
   494  	// checker checks each address in a loop, in parallel,
   495  	// until one succeeds, the global timeout is reached,
   496  	// or the tomb is killed.
   497  	checker := parallelHostChecker{
   498  		Try:             parallel.NewTry(0, nil),
   499  		client:          client,
   500  		stderr:          stdErr,
   501  		active:          make(map[network.Address]chan struct{}),
   502  		checkDelay:      opts.RetryDelay,
   503  		checkHostScript: checkHostScript,
   504  	}
   505  	defer checker.wg.Wait()
   506  	defer checker.Kill()
   507  
   508  	fmt.Fprintln(stdErr, "Waiting for address")
   509  	for {
   510  		select {
   511  		case <-pollAddresses.C:
   512  			pollAddresses.Reset(opts.AddressesDelay)
   513  			if err := inst.Refresh(); err != nil {
   514  				return "", fmt.Errorf("refreshing addresses: %v", err)
   515  			}
   516  			instanceStatus := inst.Status()
   517  			if instanceStatus.Status == status.ProvisioningError {
   518  				if instanceStatus.Message != "" {
   519  					return "", errors.Errorf("instance provisioning failed (%v)", instanceStatus.Message)
   520  				}
   521  				return "", errors.Errorf("instance provisioning failed")
   522  			}
   523  			addresses, err := inst.Addresses()
   524  			if err != nil {
   525  				return "", fmt.Errorf("getting addresses: %v", err)
   526  			}
   527  			checker.UpdateAddresses(addresses)
   528  		case <-globalTimeout:
   529  			checker.Close()
   530  			lastErr := checker.Wait()
   531  			format := "waited for %v "
   532  			args := []interface{}{opts.Timeout}
   533  			if len(checker.active) == 0 {
   534  				format += "without getting any addresses"
   535  			} else {
   536  				format += "without being able to connect"
   537  			}
   538  			if lastErr != nil && lastErr != parallel.ErrStopped {
   539  				format += ": %v"
   540  				args = append(args, lastErr)
   541  			}
   542  			return "", fmt.Errorf(format, args...)
   543  		case <-interrupted:
   544  			return "", fmt.Errorf("interrupted")
   545  		case <-checker.Dead():
   546  			result, err := checker.Result()
   547  			if err != nil {
   548  				return "", err
   549  			}
   550  			return result.(*hostChecker).addr.Value, nil
   551  		}
   552  	}
   553  }