github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachprod/vm/aws/aws.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package aws
    12  
    13  import (
    14  	"encoding/json"
    15  	"fmt"
    16  	"log"
    17  	"math/rand"
    18  	"os"
    19  	"os/exec"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/vm"
    24  	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/vm/flagstub"
    25  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/errors"
    28  	"github.com/spf13/pflag"
    29  	"golang.org/x/sync/errgroup"
    30  	"golang.org/x/time/rate"
    31  )
    32  
    33  // ProviderName is aws.
    34  const ProviderName = "aws"
    35  
    36  // init will inject the AWS provider into vm.Providers, but only
    37  // if the aws tool is available on the local path.
    38  func init() {
    39  	const unimplemented = "please install the AWS CLI utilities " +
    40  		"(https://docs.aws.amazon.com/cli/latest/userguide/installing.html)"
    41  	var p vm.Provider = &Provider{}
    42  	if _, err := exec.LookPath("aws"); err == nil {
    43  		// NB: This is a bit hacky, but using something like `aws iam get-user` is
    44  		// slow and not something we want to do at startup.
    45  		haveCredentials := func() bool {
    46  			const credFile = "${HOME}/.aws/credentials"
    47  			if _, err := os.Stat(os.ExpandEnv(credFile)); err == nil {
    48  				return true
    49  			}
    50  			if os.Getenv("AWS_ACCESS_KEY_ID") != "" {
    51  				return true
    52  			}
    53  			return false
    54  		}
    55  
    56  		if !haveCredentials() {
    57  			p = flagstub.New(p, unimplemented)
    58  		}
    59  	} else {
    60  		p = flagstub.New(p, unimplemented)
    61  	}
    62  
    63  	vm.Providers[ProviderName] = p
    64  }
    65  
    66  // providerOpts implements the vm.ProviderFlags interface for aws.Provider.
    67  type providerOpts struct {
    68  	Profile string
    69  	Config  *awsConfig
    70  
    71  	MachineType        string
    72  	SSDMachineType     string
    73  	CPUOptions         string
    74  	RemoteUserName     string
    75  	EBSVolumeType      string
    76  	EBSVolumeSize      int
    77  	EBSProvisionedIOPs int
    78  
    79  	// CreateZones stores the list of zones for used cluster creation.
    80  	// When > 1 zone specified, geo is automatically used, otherwise, geo depends
    81  	// on the geo flag being set. If no zones specified, defaultCreateZones are
    82  	// used. See defaultCreateZones.
    83  	CreateZones []string
    84  }
    85  
    86  const (
    87  	defaultSSDMachineType = "m5d.xlarge"
    88  	defaultMachineType    = "m5.xlarge"
    89  )
    90  
    91  var defaultConfig = func() (cfg *awsConfig) {
    92  	cfg = new(awsConfig)
    93  	if err := json.Unmarshal(MustAsset("config.json"), cfg); err != nil {
    94  		panic(errors.Wrap(err, "failed to embedded configuration"))
    95  	}
    96  	return cfg
    97  }()
    98  
    99  // defaultCreateZones is the list of availability zones used by default for
   100  // cluster creation. If the geo flag is specified, nodes are distributed between
   101  // zones.
   102  var defaultCreateZones = []string{
   103  	"us-east-2b",
   104  	"us-west-2b",
   105  	"eu-west-2b",
   106  }
   107  
   108  // ConfigureCreateFlags is part of the vm.ProviderFlags interface.
   109  // This method sets up a lot of maps between the various EC2
   110  // regions and the ids of the things we want to use there.  This is
   111  // somewhat complicated because different EC2 regions may as well
   112  // be parallel universes.
   113  func (o *providerOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
   114  
   115  	// m5.xlarge is a 4core, 16Gb instance, approximately equal to a GCE n1-standard-4
   116  	flags.StringVar(&o.MachineType, ProviderName+"-machine-type", defaultMachineType,
   117  		"Machine type (see https://aws.amazon.com/ec2/instance-types/)")
   118  
   119  	// The m5 devices only support EBS volumes, so we need a different instance type
   120  	// for directly-attached SSD support. This is 4 core, 16GB ram, 150GB ssd.
   121  	flags.StringVar(&o.SSDMachineType, ProviderName+"-machine-type-ssd", defaultSSDMachineType,
   122  		"Machine type for --local-ssd (see https://aws.amazon.com/ec2/instance-types/)")
   123  
   124  	flags.StringVar(&o.CPUOptions, ProviderName+"-cpu-options", "",
   125  		"Options to specify number of cores and threads per core (see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-optimize-cpu.html#instance-specify-cpu-options)")
   126  
   127  	// AWS images generally use "ubuntu" or "ec2-user"
   128  	flags.StringVar(&o.RemoteUserName, ProviderName+"-user",
   129  		"ubuntu", "Name of the remote user to SSH as")
   130  
   131  	flags.StringVar(&o.EBSVolumeType, ProviderName+"-ebs-volume-type",
   132  		"gp2", "Type of the EBS volume, only used if local-ssd=false")
   133  	flags.IntVar(&o.EBSVolumeSize, ProviderName+"-ebs-volume-size",
   134  		500, "Size in GB of EBS volume, only used if local-ssd=false")
   135  	flags.IntVar(&o.EBSProvisionedIOPs, ProviderName+"-ebs-iops",
   136  		1000, "Number of IOPs to provision, only used if "+ProviderName+
   137  			"-ebs-volume-type=io1")
   138  
   139  	flags.StringSliceVar(&o.CreateZones, ProviderName+"-zones", nil,
   140  		fmt.Sprintf("aws availability zones to use for cluster creation. If zones are formatted\n"+
   141  			"as AZ:N where N is an integer, the zone will be repeated N times. If > 1\n"+
   142  			"zone specified, the cluster will be spread out evenly by zone regardless\n"+
   143  			"of geo (default [%s])", strings.Join(defaultCreateZones, ",")))
   144  }
   145  
   146  func (o *providerOpts) ConfigureClusterFlags(flags *pflag.FlagSet, _ vm.MultipleProjectsOption) {
   147  	profile := os.Getenv("AWS_DEFAULT_PROFILE") // "" if unset
   148  	flags.StringVar(&o.Profile, ProviderName+"-profile", profile,
   149  		"Profile to manage cluster in")
   150  	configFlagVal := awsConfigValue{awsConfig: *defaultConfig}
   151  	o.Config = &configFlagVal.awsConfig
   152  	flags.Var(&configFlagVal, ProviderName+"-config",
   153  		"Path to json for aws configuration, defaults to predefined configuration")
   154  }
   155  
   156  // Provider implements the vm.Provider interface for AWS.
   157  type Provider struct {
   158  	opts providerOpts
   159  }
   160  
   161  // CleanSSH is part of vm.Provider.  This implementation is a no-op,
   162  // since we depend on the user's local identity file.
   163  func (p *Provider) CleanSSH() error {
   164  	return nil
   165  }
   166  
   167  // ConfigSSH ensures that for each region we're operating in, we have
   168  // a <user>-<hash> keypair where <hash> is a hash of the public key.
   169  // We use a hash since a user probably has multiple machines they're
   170  // running roachprod on and these machines (ought to) have separate
   171  // ssh keypairs.  If the remote keypair doesn't exist, we'll upload
   172  // the user's ~/.ssh/id_rsa.pub file or ask them to generate one.
   173  func (p *Provider) ConfigSSH() error {
   174  	keyName, err := p.sshKeyName()
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	regions, err := p.allRegions(p.opts.Config.availabilityZoneNames())
   180  	if err != nil {
   181  		return err
   182  	}
   183  
   184  	var g errgroup.Group
   185  	for _, r := range regions {
   186  		// capture loop variable
   187  		region := r
   188  		g.Go(func() error {
   189  			exists, err := p.sshKeyExists(keyName, region)
   190  			if err != nil {
   191  				return err
   192  			}
   193  			if !exists {
   194  				err = p.sshKeyImport(keyName, region)
   195  				if err != nil {
   196  					return err
   197  				}
   198  				log.Printf("imported %s as %s in region %s",
   199  					sshPublicKeyFile, keyName, region)
   200  			}
   201  			return nil
   202  		})
   203  	}
   204  
   205  	return g.Wait()
   206  }
   207  
   208  // Create is part of the vm.Provider interface.
   209  func (p *Provider) Create(names []string, opts vm.CreateOpts) error {
   210  	// We need to make sure that the SSH keys have been distributed to all regions
   211  	if err := p.ConfigSSH(); err != nil {
   212  		return err
   213  	}
   214  
   215  	expandedZones, err := vm.ExpandZonesFlag(p.opts.CreateZones)
   216  	if err != nil {
   217  		return err
   218  	}
   219  
   220  	useDefaultZones := len(expandedZones) == 0
   221  	if useDefaultZones {
   222  		expandedZones = defaultCreateZones
   223  	}
   224  
   225  	regions, err := p.allRegions(expandedZones)
   226  	if err != nil {
   227  		return err
   228  	}
   229  	if len(regions) < 1 {
   230  		return errors.Errorf("Please specify a valid region.")
   231  	}
   232  
   233  	var zones []string // contains an az corresponding to each entry in names
   234  	if !opts.GeoDistributed && (useDefaultZones || len(expandedZones) == 1) {
   235  		// Only use one zone in the region if we're not creating a geo cluster.
   236  		regionZones, err := p.regionZones(regions[0], expandedZones)
   237  		if err != nil {
   238  			return err
   239  		}
   240  		// Select a random AZ from the first region.
   241  		zone := regionZones[rand.Intn(len(regionZones))]
   242  		for range names {
   243  			zones = append(zones, zone)
   244  		}
   245  	} else {
   246  		// Distribute the nodes amongst availability zones if geo distributed.
   247  		nodeZones := vm.ZonePlacement(len(expandedZones), len(names))
   248  		zones = make([]string, len(nodeZones))
   249  		for i, z := range nodeZones {
   250  			zones[i] = expandedZones[z]
   251  		}
   252  	}
   253  	var g errgroup.Group
   254  	const rateLimit = 2 // per second
   255  	limiter := rate.NewLimiter(rateLimit, 2 /* buckets */)
   256  	for i := range names {
   257  		capName := names[i]
   258  		placement := zones[i]
   259  		res := limiter.Reserve()
   260  		g.Go(func() error {
   261  			time.Sleep(res.Delay())
   262  			return p.runInstance(capName, placement, opts)
   263  		})
   264  	}
   265  	if err := g.Wait(); err != nil {
   266  		return err
   267  	}
   268  	return p.waitForIPs(names, regions)
   269  }
   270  
   271  // waitForIPs waits until AWS reports both internal and external IP addresses
   272  // for all newly created VMs. If we did not wait for these IPs then attempts to
   273  // list the new VMs after the creation might find VMs without an external IP.
   274  // We do a bad job at higher layers detecting this lack of IP which can lead to
   275  // commands hanging indefinitely.
   276  func (p *Provider) waitForIPs(names []string, regions []string) error {
   277  	waitForIPRetry := retry.Start(retry.Options{
   278  		InitialBackoff: 100 * time.Millisecond,
   279  		MaxBackoff:     500 * time.Millisecond,
   280  		MaxRetries:     120, // wait a bit less than 90s for IPs
   281  	})
   282  	makeNameSet := func() map[string]struct{} {
   283  		m := make(map[string]struct{}, len(names))
   284  		for _, n := range names {
   285  			m[n] = struct{}{}
   286  		}
   287  		return m
   288  	}
   289  	for waitForIPRetry.Next() {
   290  		vms, err := p.listRegions(regions)
   291  		if err != nil {
   292  			return err
   293  		}
   294  		nameSet := makeNameSet()
   295  		for _, vm := range vms {
   296  			if vm.PublicIP != "" && vm.PrivateIP != "" {
   297  				delete(nameSet, vm.Name)
   298  			}
   299  		}
   300  		if len(nameSet) == 0 {
   301  			return nil
   302  		}
   303  	}
   304  	return fmt.Errorf("failed to retrieve IPs for all vms")
   305  }
   306  
   307  // Delete is part of vm.Provider.
   308  // This will delete all instances in a single AWS command.
   309  func (p *Provider) Delete(vms vm.List) error {
   310  	byRegion, err := regionMap(vms)
   311  	if err != nil {
   312  		return err
   313  	}
   314  	g := errgroup.Group{}
   315  	for region, list := range byRegion {
   316  		args := []string{
   317  			"ec2", "terminate-instances",
   318  			"--region", region,
   319  			"--instance-ids",
   320  		}
   321  		args = append(args, list.ProviderIDs()...)
   322  		g.Go(func() error {
   323  			var data struct {
   324  				TerminatingInstances []struct {
   325  					InstanceID string `json:"InstanceId"`
   326  				}
   327  			}
   328  			_ = data.TerminatingInstances // silence unused warning
   329  			if len(data.TerminatingInstances) > 0 {
   330  				_ = data.TerminatingInstances[0].InstanceID // silence unused warning
   331  			}
   332  			return p.runJSONCommand(args, &data)
   333  		})
   334  	}
   335  	return g.Wait()
   336  }
   337  
   338  // Extend is part of the vm.Provider interface.
   339  // This will update the Lifetime tag on the instances.
   340  func (p *Provider) Extend(vms vm.List, lifetime time.Duration) error {
   341  	byRegion, err := regionMap(vms)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	g := errgroup.Group{}
   346  	for region, list := range byRegion {
   347  		// Capture loop vars here
   348  		args := []string{
   349  			"ec2", "create-tags",
   350  			"--region", region,
   351  			"--tags", "Key=Lifetime,Value=" + lifetime.String(),
   352  			"--resources",
   353  		}
   354  		args = append(args, list.ProviderIDs()...)
   355  
   356  		g.Go(func() error {
   357  			_, err := p.runCommand(args)
   358  			return err
   359  		})
   360  	}
   361  	return g.Wait()
   362  }
   363  
   364  // cachedActiveAccount memoizes the return value from FindActiveAccount
   365  var cachedActiveAccount string
   366  
   367  // FindActiveAccount is part of the vm.Provider interface.
   368  // This queries the AWS command for the current IAM user or role.
   369  func (p *Provider) FindActiveAccount() (string, error) {
   370  	if len(cachedActiveAccount) > 0 {
   371  		return cachedActiveAccount, nil
   372  	}
   373  	var account string
   374  	var err error
   375  	if p.opts.Profile == "" {
   376  		account, err = p.iamGetUser()
   377  		if err != nil {
   378  			return "", err
   379  		}
   380  	} else {
   381  		account, err = p.stsGetCallerIdentity()
   382  		if err != nil {
   383  			return "", err
   384  		}
   385  	}
   386  	cachedActiveAccount = account
   387  	return cachedActiveAccount, nil
   388  }
   389  
   390  // iamGetUser returns the identity of an IAM user.
   391  func (p *Provider) iamGetUser() (string, error) {
   392  	var userInfo struct {
   393  		User struct {
   394  			UserName string
   395  		}
   396  	}
   397  	args := []string{"iam", "get-user"}
   398  	err := p.runJSONCommand(args, &userInfo)
   399  	if err != nil {
   400  		return "", err
   401  	}
   402  	if userInfo.User.UserName == "" {
   403  		return "", errors.Errorf("username not configured. run 'aws iam get-user'")
   404  	}
   405  	return userInfo.User.UserName, nil
   406  }
   407  
   408  // stsGetCallerIdentity returns the identity of a user assuming a role
   409  // into the account.
   410  func (p *Provider) stsGetCallerIdentity() (string, error) {
   411  	var userInfo struct {
   412  		Arn string
   413  	}
   414  	args := []string{"sts", "get-caller-identity"}
   415  	err := p.runJSONCommand(args, &userInfo)
   416  	if err != nil {
   417  		return "", err
   418  	}
   419  	s := strings.Split(userInfo.Arn, "/")
   420  	if len(s) < 2 {
   421  		return "", errors.Errorf("Could not parse caller identity ARN '%s'", userInfo.Arn)
   422  	}
   423  	return s[1], nil
   424  }
   425  
   426  // Flags is part of the vm.Provider interface.
   427  func (p *Provider) Flags() vm.ProviderFlags {
   428  	return &p.opts
   429  }
   430  
   431  // List is part of the vm.Provider interface.
   432  func (p *Provider) List() (vm.List, error) {
   433  	regions, err := p.allRegions(p.opts.Config.availabilityZoneNames())
   434  	if err != nil {
   435  		return nil, err
   436  	}
   437  	return p.listRegions(regions)
   438  }
   439  
   440  func (p *Provider) listRegions(regions []string) (vm.List, error) {
   441  	var ret vm.List
   442  	var mux syncutil.Mutex
   443  	var g errgroup.Group
   444  
   445  	for _, r := range regions {
   446  		// capture loop variable
   447  		region := r
   448  		g.Go(func() error {
   449  			vms, err := p.listRegion(region)
   450  			if err != nil {
   451  				return err
   452  			}
   453  			mux.Lock()
   454  			ret = append(ret, vms...)
   455  			mux.Unlock()
   456  			return nil
   457  		})
   458  	}
   459  
   460  	if err := g.Wait(); err != nil {
   461  		return nil, err
   462  	}
   463  
   464  	return ret, nil
   465  }
   466  
   467  // Name is part of the vm.Provider interface. This returns "aws".
   468  func (p *Provider) Name() string {
   469  	return ProviderName
   470  }
   471  
   472  // allRegions returns the regions that have been configured with
   473  // AMI and SecurityGroup instances.
   474  func (p *Provider) allRegions(zones []string) (regions []string, err error) {
   475  	byName := make(map[string]struct{})
   476  	for _, z := range zones {
   477  		az := p.opts.Config.getAvailabilityZone(z)
   478  		if az == nil {
   479  			return nil, fmt.Errorf("unknown availability zone %v, please provide a "+
   480  				"correct value or update your config accordingly", z)
   481  		}
   482  		if _, have := byName[az.region.Name]; !have {
   483  			byName[az.region.Name] = struct{}{}
   484  			regions = append(regions, az.region.Name)
   485  		}
   486  	}
   487  	return regions, nil
   488  }
   489  
   490  // regionZones returns all AWS availability zones which have been correctly
   491  // configured within the given region.
   492  func (p *Provider) regionZones(region string, allZones []string) (zones []string, _ error) {
   493  	r := p.opts.Config.getRegion(region)
   494  	if r == nil {
   495  		return nil, fmt.Errorf("region %s not found", region)
   496  	}
   497  	for _, z := range allZones {
   498  		for _, az := range r.AvailabilityZones {
   499  			if az.name == z {
   500  				zones = append(zones, z)
   501  				break
   502  			}
   503  		}
   504  	}
   505  	return zones, nil
   506  }
   507  
   508  // listRegion extracts the roachprod-managed instances in the
   509  // given region.
   510  func (p *Provider) listRegion(region string) (vm.List, error) {
   511  	var data struct {
   512  		Reservations []struct {
   513  			Instances []struct {
   514  				InstanceID string `json:"InstanceId"`
   515  				LaunchTime string
   516  				Placement  struct {
   517  					AvailabilityZone string
   518  				}
   519  				PrivateDNSName   string `json:"PrivateDnsName"`
   520  				PrivateIPAddress string `json:"PrivateIpAddress"`
   521  				PublicDNSName    string `json:"PublicDnsName"`
   522  				PublicIPAddress  string `json:"PublicIpAddress"`
   523  				State            struct {
   524  					Code int
   525  					Name string
   526  				}
   527  				Tags []struct {
   528  					Key   string
   529  					Value string
   530  				}
   531  				VpcID        string `json:"VpcId"`
   532  				InstanceType string
   533  			}
   534  		}
   535  	}
   536  	args := []string{
   537  		"ec2", "describe-instances",
   538  		"--region", region,
   539  	}
   540  	err := p.runJSONCommand(args, &data)
   541  	if err != nil {
   542  		return nil, err
   543  	}
   544  
   545  	var ret vm.List
   546  	for _, res := range data.Reservations {
   547  	in:
   548  		for _, in := range res.Instances {
   549  			// Ignore any instances that are not pending or running
   550  			if in.State.Name != "pending" && in.State.Name != "running" {
   551  				continue in
   552  			}
   553  			_ = in.PublicDNSName // silence unused warning
   554  			_ = in.State.Code    // silence unused warning
   555  
   556  			// Convert the tag map into a more useful representation
   557  			tagMap := make(map[string]string, len(in.Tags))
   558  			for _, entry := range in.Tags {
   559  				tagMap[entry.Key] = entry.Value
   560  			}
   561  			// Ignore any instances that we didn't create
   562  			if tagMap["Roachprod"] != "true" {
   563  				continue in
   564  			}
   565  
   566  			var errs []error
   567  			createdAt, err := time.Parse(time.RFC3339, in.LaunchTime)
   568  			if err != nil {
   569  				errs = append(errs, vm.ErrNoExpiration)
   570  			}
   571  
   572  			var lifetime time.Duration
   573  			if lifeText, ok := tagMap["Lifetime"]; ok {
   574  				lifetime, err = time.ParseDuration(lifeText)
   575  				if err != nil {
   576  					errs = append(errs, err)
   577  				}
   578  			} else {
   579  				errs = append(errs, vm.ErrNoExpiration)
   580  			}
   581  
   582  			m := vm.VM{
   583  				CreatedAt:   createdAt,
   584  				DNS:         in.PrivateDNSName,
   585  				Name:        tagMap["Name"],
   586  				Errors:      errs,
   587  				Lifetime:    lifetime,
   588  				PrivateIP:   in.PrivateIPAddress,
   589  				Provider:    ProviderName,
   590  				ProviderID:  in.InstanceID,
   591  				PublicIP:    in.PublicIPAddress,
   592  				RemoteUser:  p.opts.RemoteUserName,
   593  				VPC:         in.VpcID,
   594  				MachineType: in.InstanceType,
   595  				Zone:        in.Placement.AvailabilityZone,
   596  			}
   597  			ret = append(ret, m)
   598  		}
   599  	}
   600  
   601  	return ret, nil
   602  }
   603  
   604  // runInstance is responsible for allocating a single ec2 vm.
   605  // Given that every AWS region may as well be a parallel dimension,
   606  // we need to do a bit of work to look up all of the various ids that
   607  // we need in order to actually allocate an instance.
   608  func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) error {
   609  	// There exist different flags to control the machine type when ssd is true.
   610  	// This enables sane defaults for either setting but the behavior can be
   611  	// confusing when a user attempts to use `--aws-machine-type` and the command
   612  	// succeeds but the flag is ignored. Rather than permit this behavior we
   613  	// return an error instructing the user to use the other flag.
   614  	if opts.SSDOpts.UseLocalSSD &&
   615  		p.opts.MachineType != defaultMachineType &&
   616  		p.opts.SSDMachineType == defaultSSDMachineType {
   617  		return errors.Errorf("use the --aws-machine-type-ssd flag to set the " +
   618  			"machine type when --local-ssd=true")
   619  	} else if !opts.SSDOpts.UseLocalSSD &&
   620  		p.opts.MachineType == defaultMachineType &&
   621  		p.opts.SSDMachineType != defaultSSDMachineType {
   622  		return errors.Errorf("use the --aws-machine-type flag to set the " +
   623  			"machine type when --local-ssd=false")
   624  	}
   625  
   626  	az, ok := p.opts.Config.azByName[zone]
   627  	if !ok {
   628  		return fmt.Errorf("no region in %v corresponds to availability zone %v",
   629  			p.opts.Config.regionNames(), zone)
   630  	}
   631  
   632  	keyName, err := p.sshKeyName()
   633  	if err != nil {
   634  		return err
   635  	}
   636  
   637  	var machineType string
   638  	if opts.SSDOpts.UseLocalSSD {
   639  		machineType = p.opts.SSDMachineType
   640  	} else {
   641  		machineType = p.opts.MachineType
   642  	}
   643  
   644  	cpuOptions := p.opts.CPUOptions
   645  
   646  	// We avoid the need to make a second call to set the tags by jamming
   647  	// all of our metadata into the TagSpec.
   648  	tagSpecs := fmt.Sprintf(
   649  		"ResourceType=instance,Tags=["+
   650  			"{Key=Lifetime,Value=%s},"+
   651  			"{Key=Name,Value=%s},"+
   652  			"{Key=Roachprod,Value=true},"+
   653  			"]", opts.Lifetime, name)
   654  
   655  	var data struct {
   656  		Instances []struct {
   657  			InstanceID string `json:"InstanceId"`
   658  		}
   659  	}
   660  	_ = data.Instances // silence unused warning
   661  	if len(data.Instances) > 0 {
   662  		_ = data.Instances[0].InstanceID // silence unused warning
   663  	}
   664  
   665  	// Create AWS startup script file.
   666  	extraMountOpts := ""
   667  	// Dynamic args.
   668  	if opts.SSDOpts.UseLocalSSD {
   669  		if opts.SSDOpts.NoExt4Barrier {
   670  			extraMountOpts = "nobarrier"
   671  		}
   672  	}
   673  	filename, err := writeStartupScript(extraMountOpts)
   674  	if err != nil {
   675  		return errors.Wrapf(err, "could not write AWS startup script to temp file")
   676  	}
   677  	defer func() {
   678  		_ = os.Remove(filename)
   679  	}()
   680  
   681  	args := []string{
   682  		"ec2", "run-instances",
   683  		"--associate-public-ip-address",
   684  		"--count", "1",
   685  		"--image-id", az.region.AMI,
   686  		"--instance-type", machineType,
   687  		"--key-name", keyName,
   688  		"--region", az.region.Name,
   689  		"--security-group-ids", az.region.SecurityGroup,
   690  		"--subnet-id", az.subnetID,
   691  		"--tag-specifications", tagSpecs,
   692  		"--user-data", "file://" + filename,
   693  	}
   694  	if cpuOptions != "" {
   695  		args = append(args, "--cpu-options", cpuOptions)
   696  	}
   697  
   698  	// The local NVMe devices are automatically mapped.  Otherwise, we need to map an EBS data volume.
   699  	if !opts.SSDOpts.UseLocalSSD {
   700  		var ebsParams string
   701  		switch t := p.opts.EBSVolumeType; t {
   702  		case "gp2":
   703  			ebsParams = fmt.Sprintf("{VolumeSize=%d,VolumeType=%s,DeleteOnTermination=true}",
   704  				p.opts.EBSVolumeSize, t)
   705  		case "io1":
   706  			ebsParams = fmt.Sprintf("{VolumeSize=%d,VolumeType=%s,Iops=%d,DeleteOnTermination=true}",
   707  				p.opts.EBSVolumeSize, t, p.opts.EBSProvisionedIOPs)
   708  		default:
   709  			return errors.Errorf("Unknown EBS volume type %s", t)
   710  		}
   711  		args = append(args,
   712  			"--block-device-mapping",
   713  			// Size is measured in GB.  gp2 type derives guaranteed iops from size.
   714  			"DeviceName=/dev/sdd,Ebs="+ebsParams,
   715  		)
   716  	}
   717  
   718  	return p.runJSONCommand(args, &data)
   719  }
   720  
   721  // Active is part of the vm.Provider interface.
   722  func (p *Provider) Active() bool {
   723  	return true
   724  }