github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/provider/common/bootstrap.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package common
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/loggo"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/parallel"
    19  	"github.com/juju/utils/shell"
    20  
    21  	"github.com/juju/juju/agent"
    22  	"github.com/juju/juju/cloudconfig"
    23  	"github.com/juju/juju/cloudconfig/cloudinit"
    24  	"github.com/juju/juju/cloudconfig/instancecfg"
    25  	"github.com/juju/juju/cloudconfig/sshinit"
    26  	"github.com/juju/juju/environs"
    27  	"github.com/juju/juju/environs/config"
    28  	"github.com/juju/juju/instance"
    29  	"github.com/juju/juju/network"
    30  	coretools "github.com/juju/juju/tools"
    31  	"github.com/juju/juju/utils/ssh"
    32  )
    33  
    34  var logger = loggo.GetLogger("juju.provider.common")
    35  
    36  // Bootstrap is a common implementation of the Bootstrap method defined on
    37  // environs.Environ; we strongly recommend that this implementation be used
    38  // when writing a new provider.
    39  func Bootstrap(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    40  ) (arch, series string, _ environs.BootstrapFinalizer, err error) {
    41  	if result, series, finalizer, err := BootstrapInstance(ctx, env, args); err == nil {
    42  		return *result.Hardware.Arch, series, finalizer, nil
    43  	} else {
    44  		return "", "", nil, err
    45  	}
    46  }
    47  
    48  // BootstrapInstance creates a new instance with the series and architecture
    49  // of its choice, constrained to those of the available tools, and
    50  // returns the instance result, series, and a function that
    51  // must be called to finalize the bootstrap process by transferring
    52  // the tools and installing the initial Juju state server.
    53  // This method is called by Bootstrap above, which implements environs.Bootstrap, but
    54  // is also exported so that providers can manipulate the started instance.
    55  func BootstrapInstance(ctx environs.BootstrapContext, env environs.Environ, args environs.BootstrapParams,
    56  ) (_ *environs.StartInstanceResult, series string, _ environs.BootstrapFinalizer, err error) {
    57  	// TODO make safe in the case of racing Bootstraps
    58  	// If two Bootstraps are called concurrently, there's
    59  	// no way to make sure that only one succeeds.
    60  
    61  	// First thing, ensure we have tools otherwise there's no point.
    62  	series = config.PreferredSeries(env.Config())
    63  	availableTools, err := args.AvailableTools.Match(coretools.Filter{Series: series})
    64  	if err != nil {
    65  		return nil, "", nil, err
    66  	}
    67  
    68  	// Get the bootstrap SSH client. Do this early, so we know
    69  	// not to bother with any of the below if we can't finish the job.
    70  	client := ssh.DefaultClient
    71  	if client == nil {
    72  		// This should never happen: if we don't have OpenSSH, then
    73  		// go.crypto/ssh should be used with an auto-generated key.
    74  		return nil, "", nil, fmt.Errorf("no SSH client available")
    75  	}
    76  
    77  	instanceConfig, err := instancecfg.NewBootstrapInstanceConfig(args.Constraints, series)
    78  	if err != nil {
    79  		return nil, "", nil, err
    80  	}
    81  	instanceConfig.EnableOSRefreshUpdate = env.Config().EnableOSRefreshUpdate()
    82  	instanceConfig.EnableOSUpgrade = env.Config().EnableOSUpgrade()
    83  	instanceConfig.Tags = instancecfg.InstanceTags(env.Config(), instanceConfig.Jobs)
    84  	maybeSetBridge := func(icfg *instancecfg.InstanceConfig) {
    85  		// If we need to override the default bridge name, do it now. When
    86  		// args.ContainerBridgeName is empty, the default names for LXC
    87  		// (lxcbr0) and KVM (virbr0) will be used.
    88  		if args.ContainerBridgeName != "" {
    89  			logger.Debugf("using %q as network bridge for all container types", args.ContainerBridgeName)
    90  			if icfg.AgentEnvironment == nil {
    91  				icfg.AgentEnvironment = make(map[string]string)
    92  			}
    93  			icfg.AgentEnvironment[agent.LxcBridge] = args.ContainerBridgeName
    94  		}
    95  	}
    96  	maybeSetBridge(instanceConfig)
    97  
    98  	fmt.Fprintln(ctx.GetStderr(), "Launching instance")
    99  	result, err := env.StartInstance(environs.StartInstanceParams{
   100  		Constraints:    args.Constraints,
   101  		Tools:          availableTools,
   102  		InstanceConfig: instanceConfig,
   103  		Placement:      args.Placement,
   104  	})
   105  	if err != nil {
   106  		return nil, "", nil, errors.Annotate(err, "cannot start bootstrap instance")
   107  	}
   108  	fmt.Fprintf(ctx.GetStderr(), " - %s\n", result.Instance.Id())
   109  
   110  	finalize := func(ctx environs.BootstrapContext, icfg *instancecfg.InstanceConfig) error {
   111  		icfg.InstanceId = result.Instance.Id()
   112  		icfg.HardwareCharacteristics = result.Hardware
   113  		if err := instancecfg.FinishInstanceConfig(icfg, env.Config()); err != nil {
   114  			return err
   115  		}
   116  		maybeSetBridge(icfg)
   117  		return FinishBootstrap(ctx, client, result.Instance, icfg)
   118  	}
   119  	return result, series, finalize, nil
   120  }
   121  
   122  // FinishBootstrap completes the bootstrap process by connecting
   123  // to the instance via SSH and carrying out the cloud-config.
   124  //
   125  // Note: FinishBootstrap is exposed so it can be replaced for testing.
   126  var FinishBootstrap = func(ctx environs.BootstrapContext, client ssh.Client, inst instance.Instance, instanceConfig *instancecfg.InstanceConfig) error {
   127  	interrupted := make(chan os.Signal, 1)
   128  	ctx.InterruptNotify(interrupted)
   129  	defer ctx.StopInterruptNotify(interrupted)
   130  	// Each attempt to connect to an address must verify the machine is the
   131  	// bootstrap machine by checking its nonce file exists and contains the
   132  	// nonce in the InstanceConfig. This also blocks sshinit from proceeding
   133  	// until cloud-init has completed, which is necessary to ensure apt
   134  	// invocations don't trample each other.
   135  	nonceFile := utils.ShQuote(path.Join(instanceConfig.DataDir, cloudconfig.NonceFile))
   136  	checkNonceCommand := fmt.Sprintf(`
   137  	noncefile=%s
   138  	if [ ! -e "$noncefile" ]; then
   139  		echo "$noncefile does not exist" >&2
   140  		exit 1
   141  	fi
   142  	content=$(cat $noncefile)
   143  	if [ "$content" != %s ]; then
   144  		echo "$noncefile contents do not match machine nonce" >&2
   145  		exit 1
   146  	fi
   147  	`, nonceFile, utils.ShQuote(instanceConfig.MachineNonce))
   148  	addr, err := waitSSH(
   149  		ctx,
   150  		interrupted,
   151  		client,
   152  		checkNonceCommand,
   153  		inst,
   154  		instanceConfig.Config.BootstrapSSHOpts(),
   155  	)
   156  	if err != nil {
   157  		return err
   158  	}
   159  	return ConfigureMachine(ctx, client, addr, instanceConfig)
   160  }
   161  
   162  func ConfigureMachine(ctx environs.BootstrapContext, client ssh.Client, host string, instanceConfig *instancecfg.InstanceConfig) error {
   163  	// Bootstrap is synchronous, and will spawn a subprocess
   164  	// to complete the procedure. If the user hits Ctrl-C,
   165  	// SIGINT is sent to the foreground process attached to
   166  	// the terminal, which will be the ssh subprocess at this
   167  	// point. For that reason, we do not call StopInterruptNotify
   168  	// until this function completes.
   169  	cloudcfg, err := cloudinit.New(instanceConfig.Series)
   170  	if err != nil {
   171  		return errors.Trace(err)
   172  	}
   173  
   174  	// Set packaging update here
   175  	cloudcfg.SetSystemUpdate(instanceConfig.EnableOSRefreshUpdate)
   176  	cloudcfg.SetSystemUpgrade(instanceConfig.EnableOSUpgrade)
   177  
   178  	udata, err := cloudconfig.NewUserdataConfig(instanceConfig, cloudcfg)
   179  	if err != nil {
   180  		return err
   181  	}
   182  	if err := udata.ConfigureJuju(); err != nil {
   183  		return err
   184  	}
   185  	configScript, err := cloudcfg.RenderScript()
   186  	if err != nil {
   187  		return err
   188  	}
   189  	script := shell.DumpFileOnErrorScript(instanceConfig.CloudInitOutputLog) + configScript
   190  	return sshinit.RunConfigureScript(script, sshinit.ConfigureParams{
   191  		Host:           "ubuntu@" + host,
   192  		Client:         client,
   193  		Config:         cloudcfg,
   194  		ProgressWriter: ctx.GetStderr(),
   195  		Series:         instanceConfig.Series,
   196  	})
   197  }
   198  
   199  type addresser interface {
   200  	// Refresh refreshes the addresses for the instance.
   201  	Refresh() error
   202  
   203  	// Addresses returns the addresses for the instance.
   204  	// To ensure that the results are up to date, call
   205  	// Refresh first.
   206  	Addresses() ([]network.Address, error)
   207  }
   208  
   209  type hostChecker struct {
   210  	addr   network.Address
   211  	client ssh.Client
   212  	wg     *sync.WaitGroup
   213  
   214  	// checkDelay is the amount of time to wait between retries.
   215  	checkDelay time.Duration
   216  
   217  	// checkHostScript is executed on the host via SSH.
   218  	// hostChecker.loop will return once the script
   219  	// runs without error.
   220  	checkHostScript string
   221  
   222  	// closed is closed to indicate that the host checker should
   223  	// return, without waiting for the result of any ongoing
   224  	// attempts.
   225  	closed <-chan struct{}
   226  }
   227  
   228  // Close implements io.Closer, as required by parallel.Try.
   229  func (*hostChecker) Close() error {
   230  	return nil
   231  }
   232  
   233  func (hc *hostChecker) loop(dying <-chan struct{}) (io.Closer, error) {
   234  	defer hc.wg.Done()
   235  	// The value of connectSSH is taken outside the goroutine that may outlive
   236  	// hostChecker.loop, or we evoke the wrath of the race detector.
   237  	connectSSH := connectSSH
   238  	done := make(chan error, 1)
   239  	var lastErr error
   240  	for {
   241  		address := hc.addr.Value
   242  		go func() {
   243  			done <- connectSSH(hc.client, address, hc.checkHostScript)
   244  		}()
   245  		select {
   246  		case <-hc.closed:
   247  			return hc, lastErr
   248  		case <-dying:
   249  			return hc, lastErr
   250  		case lastErr = <-done:
   251  			if lastErr == nil {
   252  				return hc, nil
   253  			} else {
   254  				logger.Debugf("connection attempt for %s failed: %v", address, lastErr)
   255  			}
   256  		}
   257  		select {
   258  		case <-hc.closed:
   259  		case <-dying:
   260  		case <-time.After(hc.checkDelay):
   261  		}
   262  	}
   263  }
   264  
   265  type parallelHostChecker struct {
   266  	*parallel.Try
   267  	client ssh.Client
   268  	stderr io.Writer
   269  	wg     sync.WaitGroup
   270  
   271  	// active is a map of adresses to channels for addresses actively
   272  	// being tested. The goroutine testing the address will continue
   273  	// to attempt connecting to the address until it succeeds, the Try
   274  	// is killed, or the corresponding channel in this map is closed.
   275  	active map[network.Address]chan struct{}
   276  
   277  	// checkDelay is how long each hostChecker waits between attempts.
   278  	checkDelay time.Duration
   279  
   280  	// checkHostScript is the script to run on each host to check that
   281  	// it is the host we expect.
   282  	checkHostScript string
   283  }
   284  
   285  func (p *parallelHostChecker) UpdateAddresses(addrs []network.Address) {
   286  	for _, addr := range addrs {
   287  		if _, ok := p.active[addr]; ok {
   288  			continue
   289  		}
   290  		fmt.Fprintf(p.stderr, "Attempting to connect to %s:22\n", addr.Value)
   291  		closed := make(chan struct{})
   292  		hc := &hostChecker{
   293  			addr:            addr,
   294  			client:          p.client,
   295  			checkDelay:      p.checkDelay,
   296  			checkHostScript: p.checkHostScript,
   297  			closed:          closed,
   298  			wg:              &p.wg,
   299  		}
   300  		p.wg.Add(1)
   301  		p.active[addr] = closed
   302  		p.Start(hc.loop)
   303  	}
   304  }
   305  
   306  // Close prevents additional functions from being added to
   307  // the Try, and tells each active hostChecker to exit.
   308  func (p *parallelHostChecker) Close() error {
   309  	// We signal each checker to stop and wait for them
   310  	// each to complete; this allows us to get the error,
   311  	// as opposed to when using try.Kill which does not
   312  	// wait for the functions to complete.
   313  	p.Try.Close()
   314  	for _, ch := range p.active {
   315  		close(ch)
   316  	}
   317  	return nil
   318  }
   319  
   320  // connectSSH is called to connect to the specified host and
   321  // execute the "checkHostScript" bash script on it.
   322  var connectSSH = func(client ssh.Client, host, checkHostScript string) error {
   323  	cmd := client.Command("ubuntu@"+host, []string{"/bin/bash"}, nil)
   324  	cmd.Stdin = strings.NewReader(checkHostScript)
   325  	output, err := cmd.CombinedOutput()
   326  	if err != nil && len(output) > 0 {
   327  		err = fmt.Errorf("%s", strings.TrimSpace(string(output)))
   328  	}
   329  	return err
   330  }
   331  
   332  // waitSSH waits for the instance to be assigned a routable
   333  // address, then waits until we can connect to it via SSH.
   334  //
   335  // waitSSH attempts on all addresses returned by the instance
   336  // in parallel; the first succeeding one wins. We ensure that
   337  // private addresses are for the correct machine by checking
   338  // the presence of a file on the machine that contains the
   339  // machine's nonce. The "checkHostScript" is a bash script
   340  // that performs this file check.
   341  func waitSSH(ctx environs.BootstrapContext, interrupted <-chan os.Signal, client ssh.Client, checkHostScript string, inst addresser, timeout config.SSHTimeoutOpts) (addr string, err error) {
   342  	globalTimeout := time.After(timeout.Timeout)
   343  	pollAddresses := time.NewTimer(0)
   344  
   345  	// checker checks each address in a loop, in parallel,
   346  	// until one succeeds, the global timeout is reached,
   347  	// or the tomb is killed.
   348  	checker := parallelHostChecker{
   349  		Try:             parallel.NewTry(0, nil),
   350  		client:          client,
   351  		stderr:          ctx.GetStderr(),
   352  		active:          make(map[network.Address]chan struct{}),
   353  		checkDelay:      timeout.RetryDelay,
   354  		checkHostScript: checkHostScript,
   355  	}
   356  	defer checker.wg.Wait()
   357  	defer checker.Kill()
   358  
   359  	fmt.Fprintln(ctx.GetStderr(), "Waiting for address")
   360  	for {
   361  		select {
   362  		case <-pollAddresses.C:
   363  			pollAddresses.Reset(timeout.AddressesDelay)
   364  			if err := inst.Refresh(); err != nil {
   365  				return "", fmt.Errorf("refreshing addresses: %v", err)
   366  			}
   367  			addresses, err := inst.Addresses()
   368  			if err != nil {
   369  				return "", fmt.Errorf("getting addresses: %v", err)
   370  			}
   371  			checker.UpdateAddresses(addresses)
   372  		case <-globalTimeout:
   373  			checker.Close()
   374  			lastErr := checker.Wait()
   375  			format := "waited for %v "
   376  			args := []interface{}{timeout.Timeout}
   377  			if len(checker.active) == 0 {
   378  				format += "without getting any addresses"
   379  			} else {
   380  				format += "without being able to connect"
   381  			}
   382  			if lastErr != nil && lastErr != parallel.ErrStopped {
   383  				format += ": %v"
   384  				args = append(args, lastErr)
   385  			}
   386  			return "", fmt.Errorf(format, args...)
   387  		case <-interrupted:
   388  			return "", fmt.Errorf("interrupted")
   389  		case <-checker.Dead():
   390  			result, err := checker.Result()
   391  			if err != nil {
   392  				return "", err
   393  			}
   394  			return result.(*hostChecker).addr.Value, nil
   395  		}
   396  	}
   397  }