github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachprod/cloud/cluster_cloud.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package cloud
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  	"sort"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/config"
    21  	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/vm"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  const vmNameFormat = "user-<clusterid>-<nodeid>"
    26  
    27  // Cloud TODO(peter): document
    28  type Cloud struct {
    29  	Clusters map[string]*Cluster `json:"clusters"`
    30  	// Any VM in this list can be expected to have at least one element
    31  	// in its Errors field.
    32  	BadInstances vm.List `json:"bad_instances"`
    33  }
    34  
    35  // Clone creates a deep copy of the receiver.
    36  func (c *Cloud) Clone() *Cloud {
    37  	cc := *c
    38  	cc.Clusters = make(map[string]*Cluster, len(c.Clusters))
    39  	for k, v := range c.Clusters {
    40  		cc.Clusters[k] = v
    41  	}
    42  	return &cc
    43  }
    44  
    45  // BadInstanceErrors TODO(peter): document
    46  func (c *Cloud) BadInstanceErrors() map[error]vm.List {
    47  	ret := map[error]vm.List{}
    48  
    49  	// Expand instances and errors
    50  	for _, vm := range c.BadInstances {
    51  		for _, err := range vm.Errors {
    52  			ret[err] = append(ret[err], vm)
    53  		}
    54  	}
    55  
    56  	// Sort each List to make the output prettier
    57  	for _, v := range ret {
    58  		sort.Sort(v)
    59  	}
    60  
    61  	return ret
    62  }
    63  
    64  func newCloud() *Cloud {
    65  	return &Cloud{
    66  		Clusters: make(map[string]*Cluster),
    67  	}
    68  }
    69  
    70  // A Cluster is created by querying various vm.Provider instances.
    71  //
    72  // TODO(benesch): unify with syncedCluster.
    73  type Cluster struct {
    74  	Name string `json:"name"`
    75  	User string `json:"user"`
    76  	// This is the earliest creation and shortest lifetime across VMs.
    77  	CreatedAt time.Time     `json:"created_at"`
    78  	Lifetime  time.Duration `json:"lifetime"`
    79  	VMs       vm.List       `json:"vms"`
    80  }
    81  
    82  // Clouds returns the names of all of the various cloud providers used
    83  // by the VMs in the cluster.
    84  func (c *Cluster) Clouds() []string {
    85  	present := make(map[string]bool)
    86  	for _, m := range c.VMs {
    87  		present[m.Provider] = true
    88  	}
    89  
    90  	var ret []string
    91  	for provider := range present {
    92  		ret = append(ret, provider)
    93  	}
    94  	sort.Strings(ret)
    95  	return ret
    96  }
    97  
    98  // ExpiresAt TODO(peter): document
    99  func (c *Cluster) ExpiresAt() time.Time {
   100  	return c.CreatedAt.Add(c.Lifetime)
   101  }
   102  
   103  // GCAt TODO(peter): document
   104  func (c *Cluster) GCAt() time.Time {
   105  	// NB: GC is performed every hour. We calculate the lifetime of the cluster
   106  	// taking the GC time into account to accurately reflect when the cluster
   107  	// will be destroyed.
   108  	return c.ExpiresAt().Add(time.Hour - 1).Truncate(time.Hour)
   109  }
   110  
   111  // LifetimeRemaining TODO(peter): document
   112  func (c *Cluster) LifetimeRemaining() time.Duration {
   113  	return time.Until(c.GCAt())
   114  }
   115  
   116  func (c *Cluster) String() string {
   117  	var buf bytes.Buffer
   118  	fmt.Fprintf(&buf, "%s: %d", c.Name, len(c.VMs))
   119  	if !c.IsLocal() {
   120  		fmt.Fprintf(&buf, " (%s)", c.LifetimeRemaining().Round(time.Second))
   121  	}
   122  	return buf.String()
   123  }
   124  
   125  // PrintDetails TODO(peter): document
   126  func (c *Cluster) PrintDetails() {
   127  	fmt.Printf("%s: %s ", c.Name, c.Clouds())
   128  	if !c.IsLocal() {
   129  		l := c.LifetimeRemaining().Round(time.Second)
   130  		if l <= 0 {
   131  			fmt.Printf("expired %s ago\n", -l)
   132  		} else {
   133  			fmt.Printf("%s remaining\n", l)
   134  		}
   135  	} else {
   136  		fmt.Printf("(no expiration)\n")
   137  	}
   138  	for _, vm := range c.VMs {
   139  		fmt.Printf("  %s\t%s\t%s\t%s\n", vm.Name, vm.DNS, vm.PrivateIP, vm.PublicIP)
   140  	}
   141  }
   142  
   143  // IsLocal TODO(peter): document
   144  func (c *Cluster) IsLocal() bool {
   145  	return c.Name == config.Local
   146  }
   147  
   148  func namesFromVM(v vm.VM) (string, string, error) {
   149  	if v.IsLocal() {
   150  		return config.Local, config.Local, nil
   151  	}
   152  	name := v.Name
   153  	parts := strings.Split(name, "-")
   154  	if len(parts) < 3 {
   155  		return "", "", fmt.Errorf("expected VM name in the form %s, got %s", vmNameFormat, name)
   156  	}
   157  	return parts[0], strings.Join(parts[:len(parts)-1], "-"), nil
   158  }
   159  
   160  // ListCloud TODO(peter): document
   161  func ListCloud() (*Cloud, error) {
   162  	cloud := newCloud()
   163  
   164  	for _, p := range vm.Providers {
   165  		vms, err := p.List()
   166  		if err != nil {
   167  			return nil, err
   168  		}
   169  
   170  		for _, v := range vms {
   171  			// Parse cluster/user from VM name, but only for non-local VMs
   172  			userName, clusterName, err := namesFromVM(v)
   173  			if err != nil {
   174  				v.Errors = append(v.Errors, vm.ErrInvalidName)
   175  			}
   176  
   177  			// Anything with an error gets tossed into the BadInstances slice, and we'll correct
   178  			// the problem later on.
   179  			if len(v.Errors) > 0 {
   180  				cloud.BadInstances = append(cloud.BadInstances, v)
   181  				continue
   182  			}
   183  
   184  			if _, ok := cloud.Clusters[clusterName]; !ok {
   185  				cloud.Clusters[clusterName] = &Cluster{
   186  					Name:      clusterName,
   187  					User:      userName,
   188  					CreatedAt: v.CreatedAt,
   189  					Lifetime:  v.Lifetime,
   190  					VMs:       nil,
   191  				}
   192  			}
   193  
   194  			// Bound the cluster creation time and overall lifetime to the earliest and/or shortest VM
   195  			c := cloud.Clusters[clusterName]
   196  			c.VMs = append(c.VMs, v)
   197  			if v.CreatedAt.Before(c.CreatedAt) {
   198  				c.CreatedAt = v.CreatedAt
   199  			}
   200  			if v.Lifetime < c.Lifetime {
   201  				c.Lifetime = v.Lifetime
   202  			}
   203  		}
   204  	}
   205  
   206  	// Sort VMs for each cluster. We want to make sure we always have the same order.
   207  	for _, c := range cloud.Clusters {
   208  		sort.Sort(c.VMs)
   209  	}
   210  
   211  	return cloud, nil
   212  }
   213  
   214  // CreateCluster TODO(peter): document
   215  func CreateCluster(nodes int, opts vm.CreateOpts) error {
   216  	providerCount := len(opts.VMProviders)
   217  	if providerCount == 0 {
   218  		return errors.New("no VMProviders configured")
   219  	}
   220  
   221  	// Allocate vm names over the configured providers
   222  	vmLocations := map[string][]string{}
   223  	for i, p := 1, 0; i <= nodes; i++ {
   224  		pName := opts.VMProviders[p]
   225  		vmName := vm.Name(opts.ClusterName, i)
   226  		vmLocations[pName] = append(vmLocations[pName], vmName)
   227  
   228  		p = (p + 1) % providerCount
   229  	}
   230  
   231  	return vm.ProvidersParallel(opts.VMProviders, func(p vm.Provider) error {
   232  		return p.Create(vmLocations[p.Name()], opts)
   233  	})
   234  }
   235  
   236  // DestroyCluster TODO(peter): document
   237  func DestroyCluster(c *Cluster) error {
   238  	return vm.FanOut(c.VMs, func(p vm.Provider, vms vm.List) error {
   239  		// Enable a fast-path for providers that can destroy a cluster in one shot.
   240  		if x, ok := p.(vm.DeleteCluster); ok {
   241  			return x.DeleteCluster(c.Name)
   242  		}
   243  		return p.Delete(vms)
   244  	})
   245  }
   246  
   247  // ExtendCluster TODO(peter): document
   248  func ExtendCluster(c *Cluster, extension time.Duration) error {
   249  	newLifetime := c.Lifetime + extension
   250  
   251  	return vm.FanOut(c.VMs, func(p vm.Provider, vms vm.List) error {
   252  		return p.Extend(vms, newLifetime)
   253  	})
   254  }