github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/loggo"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/parallel"
    19  	"github.com/juju/utils/shell"
    20  
    21  	"github.com/juju/juju/agent"
    22  	"github.com/juju/juju/cloudconfig"
    23  	"github.com/juju/juju/cloudconfig/cloudinit"
    24  	"github.com/juju/juju/cloudconfig/instancecfg"
    25  	"github.com/juju/juju/cloudconfig/sshinit"
    26  	"github.com/juju/juju/environs"
    27  	"github.com/juju/juju/environs/config"
    28  	"github.com/juju/juju/instance"
    29  	"github.com/juju/juju/network"
    30  	coretools "github.com/juju/juju/tools"
    31  	"github.com/juju/juju/utils/ssh"
    32  )
    33  
    34  var logger = loggo.GetLogger("juju.provider.common")
    35  
    36  // Bootstrap is a common implementation of the Bootstrap method defined on
    37  // environs.Environ; we strongly recommend that this implementation be used
    38  // when writing a new provider.
    39  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    40  ) (arch, series string, _ environs.BootstrapFinalizer, err error) {
    41  	if result, series, finalizer, err := BootstrapInstance(ctx, env, args); err == nil {
    42  		return *result.Hardware.Arch, series, finalizer, nil
    43  	} else {
    44  		return "", "", nil, err
    45  	}
    46  }
    47  
    48  // BootstrapInstance creates a new instance with the series and architecture
    49  // of its choice, constrained to those of the available tools, and
    50  // returns the instance result, series, and a function that
    51  // must be called to finalize the bootstrap process by transferring
    52  // the tools and installing the initial Juju state server.
    53  // This method is called by Bootstrap above, which implements environs.Bootstrap, but
    54  // is also exported so that providers can manipulate the started instance.
    55  func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    56  ) (_ *environs.StartInstanceResult, series string, _ environs.BootstrapFinalizer, err error) {
    57  	// TODO make safe in the case of racing Bootstraps
    58  	// If two Bootstraps are called concurrently, there's
    59  	// no way to make sure that only one succeeds.
    60  
    61  	// First thing, ensure we have tools otherwise there's no point.
    62  	series = config.PreferredSeries(env.Config())
    63  	availableTools, err := args.AvailableTools.Match(coretools.Filter{Series: series})
    64  	if err != nil {
    65  		return nil, "", nil, err
    66  	}
    67  
    68  	// Get the bootstrap SSH client. Do this early, so we know
    69  	// not to bother with any of the below if we can't finish the job.
    70  	client := ssh.DefaultClient
    71  	if client == nil {
    72  		// This should never happen: if we don't have OpenSSH, then
    73  		// go.crypto/ssh should be used with an auto-generated key.
    74  		return nil, "", nil, fmt.Errorf("no SSH client available")
    75  	}
    76  
    77  	instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(args.Constraints, series)
    78  	if err != nil {
    79  		return nil, "", nil, err
    80  	}
    81  	instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate()
    82  	instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade()
    83  	instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs)
    84  	maybeSetBridge := func(icfg *instancecfg.InstanceConfig) {
    85  		// If we need to override the default bridge name, do it now. When
    86  		// args.ContainerBridgeName is empty, the default names for LXC
    87  		// (lxcbr0) and KVM (virbr0) will be used.
    88  		if args.ContainerBridgeName != "" {
    89  			logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName)
    90  			if icfg.AgentEnvironment == nil {
    91  				icfg.AgentEnvironment = make(map[string]string)
    92  			}
    93  			icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName
    94  		}
    95  	}
    96  	maybeSetBridge(instanceConfig)
    97  
    98  	fmt.Fprintln(ctx.GetStderr(), "Launching instance")
    99  	result, err := env.StartInstance(environs.StartInstanceParams{
   100  		Constraints:    args.Constraints,
   101  		Tools:          availableTools,
   102  		InstanceConfig: instanceConfig,
   103  		Placement:      args.Placement,
   104  	})
   105  	if err != nil {
   106  		return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance")
   107  	}
   108  	fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id())
   109  
   110  	finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error {
   111  		icfg.InstanceId = result.Instance.Id()
   112  		icfg.HardwareCharacteristics = result.Hardware
   113  		if err := instancecfg.FinishInstanceConfig(icfg, env.Config()); err != nil {
   114  			return err
   115  		}
   116  		maybeSetBridge(icfg)
   117  		return FinishBootstrap(ctx, client, env, result.Instance, icfg)
   118  	}
   119  	return result, series, finalize, nil
   120  }
   121  
   122  // FinishBootstrap completes the bootstrap process by connecting
   123  // to the instance via SSH and carrying out the cloud-config.
   124  //
   125  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   126  var FinishBootstrap = func(
   127  	ctx environs.BootstrapContext,
   128  	client ssh.Client,
   129  	env environs.Environ,
   130  	inst instance.Instance,
   131  	instanceConfig *instancecfg.InstanceConfig,
   132  ) error {
   133  	interrupted := make(chan os.Signal, 1)
   134  	ctx.InterruptNotify(interrupted)
   135  	defer ctx.StopInterruptNotify(interrupted)
   136  	// Each attempt to connect to an address must verify the machine is the
   137  	// bootstrap machine by checking its nonce file exists and contains the
   138  	// nonce in the InstanceConfig. This also blocks sshinit from proceeding
   139  	// until cloud-init has completed, which is necessary to ensure apt
   140  	// invocations don't trample each other.
   141  	nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile))
   142  	checkNonceCommand := fmt.Sprintf(`
   143  	noncefile=%s
   144  	if [ ! -e "$noncefile" ]; then
   145  		echo "$noncefile does not exist" >&2
   146  		exit 1
   147  	fi
   148  	content=$(cat $noncefile)
   149  	if [ "$content" != %s ]; then
   150  		echo "$noncefile contents do not match machine nonce" >&2
   151  		exit 1
   152  	fi
   153  	`, nonceFile, utils.ShQuote(instanceConfig.MachineNonce))
   154  	addr, err := waitSSH(
   155  		ctx,
   156  		interrupted,
   157  		client,
   158  		checkNonceCommand,
   159  		&refreshableInstance{inst, env},
   160  		instanceConfig.Config.BootstrapSSHOpts(),
   161  	)
   162  	if err != nil {
   163  		return err
   164  	}
   165  	return ConfigureMachine(ctx, client, addr, instanceConfig)
   166  }
   167  
   168  func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error {
   169  	// Bootstrap is synchronous, and will spawn a subprocess
   170  	// to complete the procedure. If the user hits Ctrl-C,
   171  	// SIGINT is sent to the foreground process attached to
   172  	// the terminal, which will be the ssh subprocess at this
   173  	// point. For that reason, we do not call StopInterruptNotify
   174  	// until this function completes.
   175  	cloudcfg, err := cloudinit.New(instanceConfig.Series)
   176  	if err != nil {
   177  		return errors.Trace(err)
   178  	}
   179  
   180  	// Set packaging update here
   181  	cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate)
   182  	cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade)
   183  
   184  	udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg)
   185  	if err != nil {
   186  		return err
   187  	}
   188  	if err := udata.ConfigureJuju(); err != nil {
   189  		return err
   190  	}
   191  	configScript, err := cloudcfg.RenderScript()
   192  	if err != nil {
   193  		return err
   194  	}
   195  	script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript
   196  	return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
   197  		Host:           "ubuntu@" + host,
   198  		Client:         client,
   199  		Config:         cloudcfg,
   200  		ProgressWriter: ctx.GetStderr(),
   201  		Series:         instanceConfig.Series,
   202  	})
   203  }
   204  
   205  type addresser interface {
   206  	// Refresh refreshes the addresses for the instance.
   207  	Refresh() error
   208  
   209  	// Addresses returns the addresses for the instance.
   210  	// To ensure that the results are up to date, call
   211  	// Refresh first.
   212  	Addresses() ([]network.Address, error)
   213  }
   214  
   215  type refreshableInstance struct {
   216  	instance.Instance
   217  	env environs.Environ
   218  }
   219  
   220  // Refresh refreshes the addresses for the instance.
   221  func (i *refreshableInstance) Refresh() error {
   222  	instances, err := i.env.Instances([]instance.Id{i.Id()})
   223  	if err != nil {
   224  		return errors.Trace(err)
   225  	}
   226  	i.Instance = instances[0]
   227  	return nil
   228  }
   229  
   230  type hostChecker struct {
   231  	addr   network.Address
   232  	client ssh.Client
   233  	wg     *sync.WaitGroup
   234  
   235  	// checkDelay is the amount of time to wait between retries.
   236  	checkDelay time.Duration
   237  
   238  	// checkHostScript is executed on the host via SSH.
   239  	// hostChecker.loop will return once the script
   240  	// runs without error.
   241  	checkHostScript string
   242  
   243  	// closed is closed to indicate that the host checker should
   244  	// return, without waiting for the result of any ongoing
   245  	// attempts.
   246  	closed <-chan struct{}
   247  }
   248  
   249  // Close implements io.Closer, as required by parallel.Try.
   250  func (*hostChecker) Close() error {
   251  	return nil
   252  }
   253  
   254  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   255  	defer hc.wg.Done()
   256  	// The value of connectSSH is taken outside the goroutine that may outlive
   257  	// hostChecker.loop, or we evoke the wrath of the race detector.
   258  	connectSSH := connectSSH
   259  	done := make(chan error, 1)
   260  	var lastErr error
   261  	for {
   262  		address := hc.addr.Value
   263  		go func() {
   264  			done <- connectSSH(hc.client, address, hc.checkHostScript)
   265  		}()
   266  		select {
   267  		case <-hc.closed:
   268  			return hc, lastErr
   269  		case <-dying:
   270  			return hc, lastErr
   271  		case lastErr = <-done:
   272  			if lastErr == nil {
   273  				return hc, nil
   274  			} else {
   275  				logger.Debugf("connection attempt for %s failed: %v", address, lastErr)
   276  			}
   277  		}
   278  		select {
   279  		case <-hc.closed:
   280  		case <-dying:
   281  		case <-time.After(hc.checkDelay):
   282  		}
   283  	}
   284  }
   285  
   286  type parallelHostChecker struct {
   287  	*parallel.Try
   288  	client ssh.Client
   289  	stderr io.Writer
   290  	wg     sync.WaitGroup
   291  
   292  	// active is a map of adresses to channels for addresses actively
   293  	// being tested. The goroutine testing the address will continue
   294  	// to attempt connecting to the address until it succeeds, the Try
   295  	// is killed, or the corresponding channel in this map is closed.
   296  	active map[network.Address]chan struct{}
   297  
   298  	// checkDelay is how long each hostChecker waits between attempts.
   299  	checkDelay time.Duration
   300  
   301  	// checkHostScript is the script to run on each host to check that
   302  	// it is the host we expect.
   303  	checkHostScript string
   304  }
   305  
   306  func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
   307  	for _, addr := range addrs {
   308  		if _, ok := p.active[addr]; ok {
   309  			continue
   310  		}
   311  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   312  		closed := make(chan struct{})
   313  		hc := &hostChecker{
   314  			addr:            addr,
   315  			client:          p.client,
   316  			checkDelay:      p.checkDelay,
   317  			checkHostScript: p.checkHostScript,
   318  			closed:          closed,
   319  			wg:              &p.wg,
   320  		}
   321  		p.wg.Add(1)
   322  		p.active[addr] = closed
   323  		p.Start(hc.loop)
   324  	}
   325  }
   326  
   327  // Close prevents additional functions from being added to
   328  // the Try, and tells each active hostChecker to exit.
   329  func (p *parallelHostChecker) Close() error {
   330  	// We signal each checker to stop and wait for them
   331  	// each to complete; this allows us to get the error,
   332  	// as opposed to when using try.Kill which does not
   333  	// wait for the functions to complete.
   334  	p.Try.Close()
   335  	for _, ch := range p.active {
   336  		close(ch)
   337  	}
   338  	return nil
   339  }
   340  
   341  // connectSSH is called to connect to the specified host and
   342  // execute the "checkHostScript" bash script on it.
   343  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   344  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   345  	cmd.Stdin = strings.NewReader(checkHostScript)
   346  	output, err := cmd.CombinedOutput()
   347  	if err != nil && len(output) > 0 {
   348  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   349  	}
   350  	return err
   351  }
   352  
   353  // waitSSH waits for the instance to be assigned a routable
   354  // address, then waits until we can connect to it via SSH.
   355  //
   356  // waitSSH attempts on all addresses returned by the instance
   357  // in parallel; the first succeeding one wins. We ensure that
   358  // private addresses are for the correct machine by checking
   359  // the presence of a file on the machine that contains the
   360  // machine's nonce. The "checkHostScript" is a bash script
   361  // that performs this file check.
   362  func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
   363  	globalTimeout := time.After(timeout.Timeout)
   364  	pollAddresses := time.NewTimer(0)
   365  
   366  	// checker checks each address in a loop, in parallel,
   367  	// until one succeeds, the global timeout is reached,
   368  	// or the tomb is killed.
   369  	checker := parallelHostChecker{
   370  		Try:             parallel.NewTry(0, nil),
   371  		client:          client,
   372  		stderr:          ctx.GetStderr(),
   373  		active:          make(map[network.Address]chan struct{}),
   374  		checkDelay:      timeout.RetryDelay,
   375  		checkHostScript: checkHostScript,
   376  	}
   377  	defer checker.wg.Wait()
   378  	defer checker.Kill()
   379  
   380  	fmt.Fprintln(ctx.GetStderr(), "Waiting for address")
   381  	for {
   382  		select {
   383  		case <-pollAddresses.C:
   384  			pollAddresses.Reset(timeout.AddressesDelay)
   385  			if err := inst.Refresh(); err != nil {
   386  				return "", fmt.Errorf("refreshing addresses: %v", err)
   387  			}
   388  			addresses, err := inst.Addresses()
   389  			if err != nil {
   390  				return "", fmt.Errorf("getting addresses: %v", err)
   391  			}
   392  			checker.UpdateAddresses(addresses)
   393  		case <-globalTimeout:
   394  			checker.Close()
   395  			lastErr := checker.Wait()
   396  			format := "waited for %v "
   397  			args := []interface{}{timeout.Timeout}
   398  			if len(checker.active) == 0 {
   399  				format += "without getting any addresses"
   400  			} else {
   401  				format += "without being able to connect"
   402  			}
   403  			if lastErr != nil && lastErr != parallel.ErrStopped {
   404  				format += ": %v"
   405  				args = append(args, lastErr)
   406  			}
   407  			return "", fmt.Errorf(format, args...)
   408  		case <-interrupted:
   409  			return "", fmt.Errorf("interrupted")
   410  		case <-checker.Dead():
   411  			result, err := checker.Result()
   412  			if err != nil {
   413  				return "", err
   414  			}
   415  			return result.(*hostChecker).addr.Value, nil
   416  		}
   417  	}
   418  }