github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachprod/vm/vm.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package vm
    12  
    13  import (
    14  	"fmt"
    15  	"log"
    16  	"regexp"
    17  	"strconv"
    18  	"strings"
    19  	"time"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/config"
    22  	"github.com/cockroachdb/errors"
    23  	"github.com/spf13/pflag"
    24  	"golang.org/x/sync/errgroup"
    25  )
    26  
    27  // A VM is an abstract representation of a specific machine instance.  This type is used across
    28  // the various cloud providers supported by roachprod.
    29  type VM struct {
    30  	Name      string    `json:"name"`
    31  	CreatedAt time.Time `json:"created_at"`
    32  	// If non-empty, indicates that some or all of the data in the VM instance
    33  	// is not present or otherwise invalid.
    34  	Errors   []error       `json:"errors"`
    35  	Lifetime time.Duration `json:"lifetime"`
    36  	// The provider-internal DNS name for the VM instance
    37  	DNS string `json:"dns"`
    38  	// The name of the cloud provider that hosts the VM instance
    39  	Provider string `json:"provider"`
    40  	// The provider-specific id for the instance.  This may or may not be the same as Name, depending
    41  	// on whether or not the cloud provider automatically assigns VM identifiers.
    42  	ProviderID string `json:"provider_id"`
    43  	PrivateIP  string `json:"private_ip"`
    44  	PublicIP   string `json:"public_ip"`
    45  	// The username that should be used to connect to the VM.
    46  	RemoteUser string `json:"remote_user"`
    47  	// The VPC value defines an equivalency set for VMs that can route
    48  	// to one another via private IP addresses.  We use this later on
    49  	// when determining whether or not cluster member should advertise
    50  	// their public or private IP.
    51  	VPC         string `json:"vpc"`
    52  	MachineType string `json:"machine_type"`
    53  	Zone        string `json:"zone"`
    54  	// Project represents the project to which this vm belongs, if the VM is in a
    55  	// cloud that supports project (i.e. GCE). Empty otherwise.
    56  	Project string `json:"project"`
    57  }
    58  
    59  // Name generates the name for the i'th node in a cluster.
    60  func Name(cluster string, idx int) string {
    61  	return fmt.Sprintf("%s-%0.4d", cluster, idx)
    62  }
    63  
    64  // Error values for VM.Error
    65  var (
    66  	ErrBadNetwork   = errors.New("could not determine network information")
    67  	ErrInvalidName  = errors.New("invalid VM name")
    68  	ErrNoExpiration = errors.New("could not determine expiration")
    69  )
    70  
    71  var regionRE = regexp.MustCompile(`(.*[^-])-?[a-z]$`)
    72  
    73  // IsLocal returns true if the VM represents the local host.
    74  func (vm *VM) IsLocal() bool {
    75  	return vm.Zone == config.Local
    76  }
    77  
    78  // Locality returns the cloud, region, and zone for the VM.  We want to include the cloud, since
    79  // GCE and AWS use similarly-named regions (e.g. us-east-1)
    80  func (vm *VM) Locality() string {
    81  	var region string
    82  	if vm.IsLocal() {
    83  		region = vm.Zone
    84  	} else if match := regionRE.FindStringSubmatch(vm.Zone); len(match) == 2 {
    85  		region = match[1]
    86  	} else {
    87  		log.Fatalf("unable to parse region from zone %q", vm.Zone)
    88  	}
    89  	return fmt.Sprintf("cloud=%s,region=%s,zone=%s", vm.Provider, region, vm.Zone)
    90  }
    91  
    92  // List represents a list of VMs.
    93  type List []VM
    94  
    95  func (vl List) Len() int           { return len(vl) }
    96  func (vl List) Swap(i, j int)      { vl[i], vl[j] = vl[j], vl[i] }
    97  func (vl List) Less(i, j int) bool { return vl[i].Name < vl[j].Name }
    98  
    99  // Names sxtracts all VM.Name entries from the List
   100  func (vl List) Names() []string {
   101  	ret := make([]string, len(vl))
   102  	for i, vm := range vl {
   103  		ret[i] = vm.Name
   104  	}
   105  	return ret
   106  }
   107  
   108  // ProviderIDs extracts all ProviderID values from the List.
   109  func (vl List) ProviderIDs() []string {
   110  	ret := make([]string, len(vl))
   111  	for i, vm := range vl {
   112  		ret[i] = vm.ProviderID
   113  	}
   114  	return ret
   115  }
   116  
   117  // CreateOpts is the set of options when creating VMs.
   118  type CreateOpts struct {
   119  	ClusterName    string
   120  	Lifetime       time.Duration
   121  	GeoDistributed bool
   122  	VMProviders    []string
   123  	SSDOpts        struct {
   124  		UseLocalSSD bool
   125  		// NoExt4Barrier, if set, makes the "-o nobarrier" flag be used when
   126  		// mounting the SSD. Ignored if UseLocalSSD is not set.
   127  		NoExt4Barrier bool
   128  	}
   129  }
   130  
   131  // MultipleProjectsOption is used to specify whether a command accepts multiple
   132  // values for the --gce-project flag.
   133  type MultipleProjectsOption bool
   134  
   135  const (
   136  	// SingleProject means that a single project is accepted.
   137  	SingleProject MultipleProjectsOption = false
   138  	// AcceptMultipleProjects means that multiple projects are supported.
   139  	AcceptMultipleProjects = true
   140  )
   141  
   142  // ProviderFlags is a hook point for Providers to supply additional,
   143  // provider-specific flags to various roachprod commands. In general, the flags
   144  // should be prefixed with the provider's name to prevent collision between
   145  // similar options.
   146  //
   147  // If a new command is added (perhaps `roachprod enlarge`) that needs
   148  // additional provider- specific flags, add a similarly-named method
   149  // `ConfigureEnlargeFlags` to mix in the additional flags.
   150  type ProviderFlags interface {
   151  	// Configures a FlagSet with any options relevant to the `create` command.
   152  	ConfigureCreateFlags(*pflag.FlagSet)
   153  	// Configures a FlagSet with any options relevant to cluster manipulation
   154  	// commands (`create`, `destroy`, `list`, `sync` and `gc`).
   155  	ConfigureClusterFlags(*pflag.FlagSet, MultipleProjectsOption)
   156  }
   157  
   158  // A Provider is a source of virtual machines running on some hosting platform.
   159  type Provider interface {
   160  	CleanSSH() error
   161  	ConfigSSH() error
   162  	Create(names []string, opts CreateOpts) error
   163  	Delete(vms List) error
   164  	Extend(vms List, lifetime time.Duration) error
   165  	// Return the account name associated with the provider
   166  	FindActiveAccount() (string, error)
   167  	// Returns a hook point for extending top-level roachprod tooling flags
   168  	Flags() ProviderFlags
   169  	List() (List, error)
   170  	// The name of the Provider, which will also surface in the top-level Providers map.
   171  	Name() string
   172  
   173  	// Active returns true if the provider is properly installed and capable of
   174  	// operating, false if it's just a stub. This allows one to test whether a
   175  	// particular provider is functioning properly by doin, for example,
   176  	// Providers[gce.ProviderName].Active. Note that just looking at
   177  	// Providers[gce.ProviderName] != nil doesn't work because
   178  	// Providers[gce.ProviderName] can be a stub.
   179  	Active() bool
   180  }
   181  
   182  // DeleteCluster is an optional capability for a Provider which can
   183  // destroy an entire cluster in a single operation.
   184  type DeleteCluster interface {
   185  	DeleteCluster(name string) error
   186  }
   187  
   188  // Providers contains all known Provider instances. This is initialized by subpackage init() functions.
   189  var Providers = map[string]Provider{}
   190  
   191  // AllProviderNames returns the names of all known vm Providers.  This is useful with the
   192  // ProvidersSequential or ProvidersParallel methods.
   193  func AllProviderNames() []string {
   194  	var ret []string
   195  	for name := range Providers {
   196  		ret = append(ret, name)
   197  	}
   198  	return ret
   199  }
   200  
   201  // FanOut collates a collection of VMs by their provider and invoke the callbacks in parallel.
   202  func FanOut(list List, action func(Provider, List) error) error {
   203  	var m = map[string]List{}
   204  	for _, vm := range list {
   205  		m[vm.Provider] = append(m[vm.Provider], vm)
   206  	}
   207  
   208  	var g errgroup.Group
   209  	for name, vms := range m {
   210  		// capture loop variables
   211  		n := name
   212  		v := vms
   213  		g.Go(func() error {
   214  			p, ok := Providers[n]
   215  			if !ok {
   216  				return errors.Errorf("unknown provider name: %s", n)
   217  			}
   218  			return action(p, v)
   219  		})
   220  	}
   221  
   222  	return g.Wait()
   223  }
   224  
   225  // Memoizes return value from FindActiveAccounts.
   226  var cachedActiveAccounts map[string]string
   227  
   228  // FindActiveAccounts queries the active providers for the name of the user
   229  // account.
   230  func FindActiveAccounts() (map[string]string, error) {
   231  	source := cachedActiveAccounts
   232  
   233  	if source == nil {
   234  		// Ask each Provider for its active account name.
   235  		source = map[string]string{}
   236  		err := ProvidersSequential(AllProviderNames(), func(p Provider) error {
   237  			account, err := p.FindActiveAccount()
   238  			if err != nil {
   239  				return err
   240  			}
   241  			if len(account) > 0 {
   242  				source[p.Name()] = account
   243  			}
   244  			return nil
   245  		})
   246  		if err != nil {
   247  			return nil, err
   248  		}
   249  		cachedActiveAccounts = source
   250  	}
   251  
   252  	// Return a copy.
   253  	ret := make(map[string]string, len(source))
   254  	for k, v := range source {
   255  		ret[k] = v
   256  	}
   257  
   258  	return ret, nil
   259  }
   260  
   261  // ForProvider resolves the Provider with the given name and executes the
   262  // action.
   263  func ForProvider(named string, action func(Provider) error) error {
   264  	p, ok := Providers[named]
   265  	if !ok {
   266  		return errors.Errorf("unknown vm provider: %s", named)
   267  	}
   268  	if err := action(p); err != nil {
   269  		return errors.Wrapf(err, "in provider: %s", named)
   270  	}
   271  	return nil
   272  }
   273  
   274  // ProvidersParallel concurrently executes actions for each named Provider.
   275  func ProvidersParallel(named []string, action func(Provider) error) error {
   276  	var g errgroup.Group
   277  	for _, name := range named {
   278  		// capture loop variable
   279  		n := name
   280  		g.Go(func() error {
   281  			return ForProvider(n, action)
   282  		})
   283  	}
   284  	return g.Wait()
   285  }
   286  
   287  // ProvidersSequential sequentially executes actions for each named Provider.
   288  func ProvidersSequential(named []string, action func(Provider) error) error {
   289  	for _, name := range named {
   290  		if err := ForProvider(name, action); err != nil {
   291  			return err
   292  		}
   293  	}
   294  	return nil
   295  }
   296  
   297  // ZonePlacement allocates zones to numNodes in an equally sized groups in the
   298  // same order as zones. If numNodes is not divisible by len(zones) the remainder
   299  // is allocated in a round-robin fashion and placed at the end of the returned
   300  // slice. The returned slice has a length of numNodes where each value is in
   301  // [0, numZones).
   302  //
   303  // For example:
   304  //
   305  //   ZonePlacement(3, 8) = []int{0, 0, 1, 1, 2, 2, 0, 1}
   306  //
   307  func ZonePlacement(numZones, numNodes int) (nodeZones []int) {
   308  	numPerZone := numNodes / numZones
   309  	extraStartIndex := numPerZone * numZones
   310  	nodeZones = make([]int, numNodes)
   311  	for i := 0; i < numNodes; i++ {
   312  		nodeZones[i] = i / numPerZone
   313  		if i >= extraStartIndex {
   314  			nodeZones[i] = i % numZones
   315  		}
   316  	}
   317  	return nodeZones
   318  }
   319  
   320  // ExpandZonesFlag takes a slice of strings which may be of the format
   321  // zone:N which implies that a given zone should be repeated N times and
   322  // expands it. For example ["us-west1-b:2", "us-east1-a:2"] will expand to
   323  // ["us-west1-b", "us-west1-b", "us-east1-a", "us-east1-a"].
   324  func ExpandZonesFlag(zoneFlag []string) (zones []string, err error) {
   325  	for _, zone := range zoneFlag {
   326  		colonIdx := strings.Index(zone, ":")
   327  		if colonIdx == -1 {
   328  			zones = append(zones, zone)
   329  			continue
   330  		}
   331  		n, err := strconv.Atoi(zone[colonIdx+1:])
   332  		if err != nil {
   333  			return zones, fmt.Errorf("failed to parse %q: %v", zone, err)
   334  		}
   335  		for i := 0; i < n; i++ {
   336  			zones = append(zones, zone[:colonIdx])
   337  		}
   338  	}
   339  	return zones, nil
   340  }