github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachprod/vm/vm.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package vm 12 13 import ( 14 "fmt" 15 "log" 16 "regexp" 17 "strconv" 18 "strings" 19 "time" 20 21 "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/config" 22 "github.com/cockroachdb/errors" 23 "github.com/spf13/pflag" 24 "golang.org/x/sync/errgroup" 25 ) 26 27 // A VM is an abstract representation of a specific machine instance. This type is used across 28 // the various cloud providers supported by roachprod. 29 type VM struct { 30 Name string `json:"name"` 31 CreatedAt time.Time `json:"created_at"` 32 // If non-empty, indicates that some or all of the data in the VM instance 33 // is not present or otherwise invalid. 34 Errors []error `json:"errors"` 35 Lifetime time.Duration `json:"lifetime"` 36 // The provider-internal DNS name for the VM instance 37 DNS string `json:"dns"` 38 // The name of the cloud provider that hosts the VM instance 39 Provider string `json:"provider"` 40 // The provider-specific id for the instance. This may or may not be the same as Name, depending 41 // on whether or not the cloud provider automatically assigns VM identifiers. 42 ProviderID string `json:"provider_id"` 43 PrivateIP string `json:"private_ip"` 44 PublicIP string `json:"public_ip"` 45 // The username that should be used to connect to the VM. 46 RemoteUser string `json:"remote_user"` 47 // The VPC value defines an equivalency set for VMs that can route 48 // to one another via private IP addresses. We use this later on 49 // when determining whether or not cluster member should advertise 50 // their public or private IP. 51 VPC string `json:"vpc"` 52 MachineType string `json:"machine_type"` 53 Zone string `json:"zone"` 54 // Project represents the project to which this vm belongs, if the VM is in a 55 // cloud that supports project (i.e. GCE). Empty otherwise. 56 Project string `json:"project"` 57 } 58 59 // Name generates the name for the i'th node in a cluster. 60 func Name(cluster string, idx int) string { 61 return fmt.Sprintf("%s-%0.4d", cluster, idx) 62 } 63 64 // Error values for VM.Error 65 var ( 66 ErrBadNetwork = errors.New("could not determine network information") 67 ErrInvalidName = errors.New("invalid VM name") 68 ErrNoExpiration = errors.New("could not determine expiration") 69 ) 70 71 var regionRE = regexp.MustCompile(`(.*[^-])-?[a-z]$`) 72 73 // IsLocal returns true if the VM represents the local host. 74 func (vm *VM) IsLocal() bool { 75 return vm.Zone == config.Local 76 } 77 78 // Locality returns the cloud, region, and zone for the VM. We want to include the cloud, since 79 // GCE and AWS use similarly-named regions (e.g. us-east-1) 80 func (vm *VM) Locality() string { 81 var region string 82 if vm.IsLocal() { 83 region = vm.Zone 84 } else if match := regionRE.FindStringSubmatch(vm.Zone); len(match) == 2 { 85 region = match[1] 86 } else { 87 log.Fatalf("unable to parse region from zone %q", vm.Zone) 88 } 89 return fmt.Sprintf("cloud=%s,region=%s,zone=%s", vm.Provider, region, vm.Zone) 90 } 91 92 // List represents a list of VMs. 93 type List []VM 94 95 func (vl List) Len() int { return len(vl) } 96 func (vl List) Swap(i, j int) { vl[i], vl[j] = vl[j], vl[i] } 97 func (vl List) Less(i, j int) bool { return vl[i].Name < vl[j].Name } 98 99 // Names sxtracts all VM.Name entries from the List 100 func (vl List) Names() []string { 101 ret := make([]string, len(vl)) 102 for i, vm := range vl { 103 ret[i] = vm.Name 104 } 105 return ret 106 } 107 108 // ProviderIDs extracts all ProviderID values from the List. 109 func (vl List) ProviderIDs() []string { 110 ret := make([]string, len(vl)) 111 for i, vm := range vl { 112 ret[i] = vm.ProviderID 113 } 114 return ret 115 } 116 117 // CreateOpts is the set of options when creating VMs. 118 type CreateOpts struct { 119 ClusterName string 120 Lifetime time.Duration 121 GeoDistributed bool 122 VMProviders []string 123 SSDOpts struct { 124 UseLocalSSD bool 125 // NoExt4Barrier, if set, makes the "-o nobarrier" flag be used when 126 // mounting the SSD. Ignored if UseLocalSSD is not set. 127 NoExt4Barrier bool 128 } 129 } 130 131 // MultipleProjectsOption is used to specify whether a command accepts multiple 132 // values for the --gce-project flag. 133 type MultipleProjectsOption bool 134 135 const ( 136 // SingleProject means that a single project is accepted. 137 SingleProject MultipleProjectsOption = false 138 // AcceptMultipleProjects means that multiple projects are supported. 139 AcceptMultipleProjects = true 140 ) 141 142 // ProviderFlags is a hook point for Providers to supply additional, 143 // provider-specific flags to various roachprod commands. In general, the flags 144 // should be prefixed with the provider's name to prevent collision between 145 // similar options. 146 // 147 // If a new command is added (perhaps `roachprod enlarge`) that needs 148 // additional provider- specific flags, add a similarly-named method 149 // `ConfigureEnlargeFlags` to mix in the additional flags. 150 type ProviderFlags interface { 151 // Configures a FlagSet with any options relevant to the `create` command. 152 ConfigureCreateFlags(*pflag.FlagSet) 153 // Configures a FlagSet with any options relevant to cluster manipulation 154 // commands (`create`, `destroy`, `list`, `sync` and `gc`). 155 ConfigureClusterFlags(*pflag.FlagSet, MultipleProjectsOption) 156 } 157 158 // A Provider is a source of virtual machines running on some hosting platform. 159 type Provider interface { 160 CleanSSH() error 161 ConfigSSH() error 162 Create(names []string, opts CreateOpts) error 163 Delete(vms List) error 164 Extend(vms List, lifetime time.Duration) error 165 // Return the account name associated with the provider 166 FindActiveAccount() (string, error) 167 // Returns a hook point for extending top-level roachprod tooling flags 168 Flags() ProviderFlags 169 List() (List, error) 170 // The name of the Provider, which will also surface in the top-level Providers map. 171 Name() string 172 173 // Active returns true if the provider is properly installed and capable of 174 // operating, false if it's just a stub. This allows one to test whether a 175 // particular provider is functioning properly by doin, for example, 176 // Providers[gce.ProviderName].Active. Note that just looking at 177 // Providers[gce.ProviderName] != nil doesn't work because 178 // Providers[gce.ProviderName] can be a stub. 179 Active() bool 180 } 181 182 // DeleteCluster is an optional capability for a Provider which can 183 // destroy an entire cluster in a single operation. 184 type DeleteCluster interface { 185 DeleteCluster(name string) error 186 } 187 188 // Providers contains all known Provider instances. This is initialized by subpackage init() functions. 189 var Providers = map[string]Provider{} 190 191 // AllProviderNames returns the names of all known vm Providers. This is useful with the 192 // ProvidersSequential or ProvidersParallel methods. 193 func AllProviderNames() []string { 194 var ret []string 195 for name := range Providers { 196 ret = append(ret, name) 197 } 198 return ret 199 } 200 201 // FanOut collates a collection of VMs by their provider and invoke the callbacks in parallel. 202 func FanOut(list List, action func(Provider, List) error) error { 203 var m = map[string]List{} 204 for _, vm := range list { 205 m[vm.Provider] = append(m[vm.Provider], vm) 206 } 207 208 var g errgroup.Group 209 for name, vms := range m { 210 // capture loop variables 211 n := name 212 v := vms 213 g.Go(func() error { 214 p, ok := Providers[n] 215 if !ok { 216 return errors.Errorf("unknown provider name: %s", n) 217 } 218 return action(p, v) 219 }) 220 } 221 222 return g.Wait() 223 } 224 225 // Memoizes return value from FindActiveAccounts. 226 var cachedActiveAccounts map[string]string 227 228 // FindActiveAccounts queries the active providers for the name of the user 229 // account. 230 func FindActiveAccounts() (map[string]string, error) { 231 source := cachedActiveAccounts 232 233 if source == nil { 234 // Ask each Provider for its active account name. 235 source = map[string]string{} 236 err := ProvidersSequential(AllProviderNames(), func(p Provider) error { 237 account, err := p.FindActiveAccount() 238 if err != nil { 239 return err 240 } 241 if len(account) > 0 { 242 source[p.Name()] = account 243 } 244 return nil 245 }) 246 if err != nil { 247 return nil, err 248 } 249 cachedActiveAccounts = source 250 } 251 252 // Return a copy. 253 ret := make(map[string]string, len(source)) 254 for k, v := range source { 255 ret[k] = v 256 } 257 258 return ret, nil 259 } 260 261 // ForProvider resolves the Provider with the given name and executes the 262 // action. 263 func ForProvider(named string, action func(Provider) error) error { 264 p, ok := Providers[named] 265 if !ok { 266 return errors.Errorf("unknown vm provider: %s", named) 267 } 268 if err := action(p); err != nil { 269 return errors.Wrapf(err, "in provider: %s", named) 270 } 271 return nil 272 } 273 274 // ProvidersParallel concurrently executes actions for each named Provider. 275 func ProvidersParallel(named []string, action func(Provider) error) error { 276 var g errgroup.Group 277 for _, name := range named { 278 // capture loop variable 279 n := name 280 g.Go(func() error { 281 return ForProvider(n, action) 282 }) 283 } 284 return g.Wait() 285 } 286 287 // ProvidersSequential sequentially executes actions for each named Provider. 288 func ProvidersSequential(named []string, action func(Provider) error) error { 289 for _, name := range named { 290 if err := ForProvider(name, action); err != nil { 291 return err 292 } 293 } 294 return nil 295 } 296 297 // ZonePlacement allocates zones to numNodes in an equally sized groups in the 298 // same order as zones. If numNodes is not divisible by len(zones) the remainder 299 // is allocated in a round-robin fashion and placed at the end of the returned 300 // slice. The returned slice has a length of numNodes where each value is in 301 // [0, numZones). 302 // 303 // For example: 304 // 305 // ZonePlacement(3, 8) = []int{0, 0, 1, 1, 2, 2, 0, 1} 306 // 307 func ZonePlacement(numZones, numNodes int) (nodeZones []int) { 308 numPerZone := numNodes / numZones 309 extraStartIndex := numPerZone * numZones 310 nodeZones = make([]int, numNodes) 311 for i := 0; i < numNodes; i++ { 312 nodeZones[i] = i / numPerZone 313 if i >= extraStartIndex { 314 nodeZones[i] = i % numZones 315 } 316 } 317 return nodeZones 318 } 319 320 // ExpandZonesFlag takes a slice of strings which may be of the format 321 // zone:N which implies that a given zone should be repeated N times and 322 // expands it. For example ["us-west1-b:2", "us-east1-a:2"] will expand to 323 // ["us-west1-b", "us-west1-b", "us-east1-a", "us-east1-a"]. 324 func ExpandZonesFlag(zoneFlag []string) (zones []string, err error) { 325 for _, zone := range zoneFlag { 326 colonIdx := strings.Index(zone, ":") 327 if colonIdx == -1 { 328 zones = append(zones, zone) 329 continue 330 } 331 n, err := strconv.Atoi(zone[colonIdx+1:]) 332 if err != nil { 333 return zones, fmt.Errorf("failed to parse %q: %v", zone, err) 334 } 335 for i := 0; i < n; i++ { 336 zones = append(zones, zone[:colonIdx]) 337 } 338 } 339 return zones, nil 340 }