github.com/openshift/installer@v1.4.17/pkg/destroy/gcp/gcp.go (about)

     1  package gcp
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net/http"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/pborman/uuid"
    11  	"github.com/pkg/errors"
    12  	"github.com/sirupsen/logrus"
    13  	resourcemanager "google.golang.org/api/cloudresourcemanager/v3"
    14  	"google.golang.org/api/compute/v1"
    15  	"google.golang.org/api/dns/v1"
    16  	"google.golang.org/api/file/v1"
    17  	"google.golang.org/api/googleapi"
    18  	"google.golang.org/api/iam/v1"
    19  	"google.golang.org/api/option"
    20  	"google.golang.org/api/storage/v1"
    21  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    22  	"k8s.io/apimachinery/pkg/util/wait"
    23  
    24  	gcpconfig "github.com/openshift/installer/pkg/asset/installconfig/gcp"
    25  	gcpconsts "github.com/openshift/installer/pkg/constants/gcp"
    26  	"github.com/openshift/installer/pkg/destroy/providers"
    27  	"github.com/openshift/installer/pkg/types"
    28  	gcptypes "github.com/openshift/installer/pkg/types/gcp"
    29  	"github.com/openshift/installer/pkg/version"
    30  )
    31  
    32  var (
    33  	defaultTimeout = 2 * time.Minute
    34  	longTimeout    = 10 * time.Minute
    35  )
    36  
    37  type resourceScope string
    38  
    39  const (
    40  	// capgProviderOwnedLabelFmt is the format string for the label
    41  	// used for resources created by the Cluster API GCP provider.
    42  	capgProviderOwnedLabelFmt = "capg-cluster-%s"
    43  
    44  	// gcpGlobalResource is an identifier to indicate that the resource(s)
    45  	// that are being deleted are globally scoped.
    46  	gcpGlobalResource resourceScope = "global"
    47  
    48  	// gcpRegionalResource is an identifier to indicate that the resource(s)
    49  	// that are being deleted are regionally scoped.
    50  	gcpRegionalResource resourceScope = "regional"
    51  )
    52  
    53  // ClusterUninstaller holds the various options for the cluster we want to delete
    54  type ClusterUninstaller struct {
    55  	Logger            logrus.FieldLogger
    56  	Region            string
    57  	ProjectID         string
    58  	NetworkProjectID  string
    59  	PrivateZoneDomain string
    60  	ClusterID         string
    61  
    62  	computeSvc *compute.Service
    63  	iamSvc     *iam.Service
    64  	dnsSvc     *dns.Service
    65  	storageSvc *storage.Service
    66  	rmSvc      *resourcemanager.Service
    67  	fileSvc    *file.Service
    68  
    69  	// cpusByMachineType caches the number of CPUs per machine type, used in quota
    70  	// calculations on deletion
    71  	cpusByMachineType map[string]int64
    72  
    73  	// cloudControllerUID is the cluster ID used by the cluster's cloud controller
    74  	// to generate load balancer related resources. It can be obtained either
    75  	// from metadata or by inferring it from existing cluster resources.
    76  	cloudControllerUID string
    77  
    78  	errorTracker
    79  	requestIDTracker
    80  	pendingItemTracker
    81  }
    82  
    83  // New returns a GCP destroyer from ClusterMetadata.
    84  func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.Destroyer, error) {
    85  	return &ClusterUninstaller{
    86  		Logger:             logger,
    87  		Region:             metadata.ClusterPlatformMetadata.GCP.Region,
    88  		ProjectID:          metadata.ClusterPlatformMetadata.GCP.ProjectID,
    89  		NetworkProjectID:   metadata.ClusterPlatformMetadata.GCP.NetworkProjectID,
    90  		PrivateZoneDomain:  metadata.ClusterPlatformMetadata.GCP.PrivateZoneDomain,
    91  		ClusterID:          metadata.InfraID,
    92  		cloudControllerUID: gcptypes.CloudControllerUID(metadata.InfraID),
    93  		requestIDTracker:   newRequestIDTracker(),
    94  		pendingItemTracker: newPendingItemTracker(),
    95  	}, nil
    96  }
    97  
    98  // Run is the entrypoint to start the uninstall process
    99  func (o *ClusterUninstaller) Run() (*types.ClusterQuota, error) {
   100  	ctx := context.Background()
   101  	ssn, err := gcpconfig.GetSession(ctx)
   102  	if err != nil {
   103  		return nil, errors.Wrap(err, "failed to get session")
   104  	}
   105  
   106  	options := []option.ClientOption{
   107  		option.WithCredentials(ssn.Credentials),
   108  		option.WithUserAgent(fmt.Sprintf("OpenShift/4.x Destroyer/%s", version.Raw)),
   109  	}
   110  
   111  	o.computeSvc, err = compute.NewService(ctx, options...)
   112  	if err != nil {
   113  		return nil, errors.Wrap(err, "failed to create compute service")
   114  	}
   115  
   116  	cctx, cancel := context.WithTimeout(ctx, longTimeout)
   117  	defer cancel()
   118  
   119  	o.cpusByMachineType = map[string]int64{}
   120  	req := o.computeSvc.MachineTypes.AggregatedList(o.ProjectID).Fields("items/*/machineTypes(name,guestCpus),nextPageToken")
   121  	if err := req.Pages(cctx, func(list *compute.MachineTypeAggregatedList) error {
   122  		for _, scopedList := range list.Items {
   123  			for _, item := range scopedList.MachineTypes {
   124  				o.cpusByMachineType[item.Name] = item.GuestCpus
   125  			}
   126  		}
   127  		return nil
   128  	}); err != nil {
   129  		return nil, errors.Wrap(err, "failed to cache machine types")
   130  	}
   131  
   132  	o.iamSvc, err = iam.NewService(ctx, options...)
   133  	if err != nil {
   134  		return nil, errors.Wrap(err, "failed to create iam service")
   135  	}
   136  
   137  	o.dnsSvc, err = dns.NewService(ctx, options...)
   138  	if err != nil {
   139  		return nil, errors.Wrap(err, "failed to create dns service")
   140  	}
   141  
   142  	o.storageSvc, err = storage.NewService(ctx, options...)
   143  	if err != nil {
   144  		return nil, errors.Wrap(err, "failed to create storage service")
   145  	}
   146  
   147  	o.rmSvc, err = resourcemanager.NewService(ctx, options...)
   148  	if err != nil {
   149  		return nil, errors.Wrap(err, "failed to create resourcemanager service")
   150  	}
   151  
   152  	o.fileSvc, err = file.NewService(ctx, options...)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("failed to create filestore service: %w", err)
   155  	}
   156  
   157  	err = wait.PollImmediateInfinite(
   158  		time.Second*10,
   159  		o.destroyCluster,
   160  	)
   161  	if err != nil {
   162  		return nil, errors.Wrap(err, "failed to destroy cluster")
   163  	}
   164  
   165  	quota := gcptypes.Quota(o.pendingItemTracker.removedQuota)
   166  	return &types.ClusterQuota{GCP: &quota}, nil
   167  }
   168  
   169  func (o *ClusterUninstaller) destroyCluster() (bool, error) {
   170  	stagedFuncs := [][]struct {
   171  		name    string
   172  		execute func(ctx context.Context) error
   173  	}{{
   174  		{name: "Stop instances", execute: o.stopInstances},
   175  	}, {
   176  		{name: "Cloud controller resources", execute: o.discoverCloudControllerResources},
   177  	}, {
   178  		{name: "Instances", execute: o.destroyInstances},
   179  		{name: "Disks", execute: o.destroyDisks},
   180  		{name: "Service accounts", execute: o.destroyServiceAccounts},
   181  		{name: "Images", execute: o.destroyImages},
   182  		{name: "DNS", execute: o.destroyDNS},
   183  		{name: "Buckets", execute: o.destroyBuckets},
   184  		{name: "Routes", execute: o.destroyRoutes},
   185  		{name: "Firewalls", execute: o.destroyFirewalls},
   186  		{name: "Addresses", execute: o.destroyAddresses},
   187  		{name: "Forwarding rules", execute: o.destroyForwardingRules},
   188  		{name: "Target Pools", execute: o.destroyTargetPools},
   189  		{name: "Instance groups", execute: o.destroyInstanceGroups},
   190  		{name: "Target TCP Proxies", execute: o.destroyTargetTCPProxies},
   191  		{name: "Backend services", execute: o.destroyBackendServices},
   192  		{name: "Health checks", execute: o.destroyHealthChecks},
   193  		{name: "HTTP Health checks", execute: o.destroyHTTPHealthChecks},
   194  		{name: "Routers", execute: o.destroyRouters},
   195  		{name: "Subnetworks", execute: o.destroySubnetworks},
   196  		{name: "Networks", execute: o.destroyNetworks},
   197  		{name: "Filestores", execute: o.destroyFilestores},
   198  	}}
   199  
   200  	// create the main Context, so all stages can accept and make context children
   201  	ctx := context.Background()
   202  
   203  	done := true
   204  	for _, stage := range stagedFuncs {
   205  		if done {
   206  			for _, f := range stage {
   207  				err := f.execute(ctx)
   208  				if err != nil {
   209  					o.Logger.Debugf("%s: %v", f.name, err)
   210  					done = false
   211  				}
   212  			}
   213  		}
   214  	}
   215  	return done, nil
   216  }
   217  
   218  // getZoneName extracts a zone name from a zone URL
   219  func (o *ClusterUninstaller) getZoneName(zoneURL string) string {
   220  	return getNameFromURL("zones", zoneURL)
   221  }
   222  
   223  // getNameFromURL gets the item name from the full URL, ex:
   224  // https://www.googleapis.com/compute/v1/projects/project-id/zones/us-central1-a -> us-central1-a
   225  // https://www.googleapis.com/compute/v1/projects/project-id/global/networks/something-network -> something-network
   226  func getNameFromURL(item, url string) string {
   227  	items := strings.Split(url, item+"/")
   228  	if len(items) < 2 {
   229  		return ""
   230  	}
   231  	return items[len(items)-1]
   232  }
   233  
   234  // getRegionFromZone extracts a region name from a zone name of the form: us-central1-a
   235  // Splitting the name with the last delimiter `-`, leaves a string like: us-central1
   236  func getRegionFromZone(zoneName string) string {
   237  	return zoneName[:strings.LastIndex(zoneName, "-")]
   238  }
   239  
   240  // getDiskLimit determines the name of the quota Limit that applies to the disk type, ex:
   241  // projects/project/zones/zone/diskTypes/pd-standard -> "ssd_total_storage"
   242  func getDiskLimit(typeURL string) string {
   243  	switch getNameFromURL("diskTypes", typeURL) {
   244  	case "pd-balanced", "pd-ssd", "hyperdisk-balanced":
   245  		return "ssd_total_storage"
   246  	case "pd-standard":
   247  		return "disks_total_storage"
   248  	default:
   249  		return "unknown"
   250  	}
   251  }
   252  
   253  func (o *ClusterUninstaller) isClusterResource(name string) bool {
   254  	return strings.HasPrefix(name, o.ClusterID+"-")
   255  }
   256  
   257  func (o *ClusterUninstaller) clusterIDFilter() string {
   258  	return fmt.Sprintf("name : \"%s-*\"", o.ClusterID)
   259  }
   260  
   261  func (o *ClusterUninstaller) clusterLabelFilter() string {
   262  	return fmt.Sprintf("(labels.%s = \"owned\") OR (labels.%s = \"owned\")",
   263  		fmt.Sprintf(gcpconsts.ClusterIDLabelFmt, o.ClusterID), fmt.Sprintf(capgProviderOwnedLabelFmt, o.ClusterID))
   264  }
   265  
   266  func (o *ClusterUninstaller) clusterLabelOrClusterIDFilter() string {
   267  	return fmt.Sprintf("(%s) OR (%s)", o.clusterIDFilter(), o.clusterLabelFilter())
   268  }
   269  
   270  func isForbidden(err error) bool {
   271  	if err == nil {
   272  		return false
   273  	}
   274  	var ae *googleapi.Error
   275  	if errors.As(err, &ae) {
   276  		return ae.Code == http.StatusForbidden
   277  	}
   278  
   279  	return false
   280  }
   281  
   282  func isNoOp(err error) bool {
   283  	if err == nil {
   284  		return false
   285  	}
   286  	ae, ok := err.(*googleapi.Error)
   287  	return ok && (ae.Code == http.StatusNotFound || ae.Code == http.StatusNotModified)
   288  }
   289  
   290  // aggregateError is a utility function that takes a slice of errors and an
   291  // optional pending argument, and returns an error or nil
   292  func aggregateError(errs []error, pending ...int) error {
   293  	err := utilerrors.NewAggregate(errs)
   294  	if err != nil {
   295  		return err
   296  	}
   297  	if len(pending) > 0 && pending[0] > 0 {
   298  		return errors.Errorf("%d items pending", pending[0])
   299  	}
   300  	return nil
   301  }
   302  
   303  // requestIDTracker keeps track of a set of request IDs mapped to a unique resource
   304  // identifier
   305  type requestIDTracker struct {
   306  	requestIDs map[string]string
   307  }
   308  
   309  func newRequestIDTracker() requestIDTracker {
   310  	return requestIDTracker{
   311  		requestIDs: map[string]string{},
   312  	}
   313  }
   314  
   315  // requestID returns a UID for a given item identifier. Unless the ID is reset, the
   316  // same requestID will be returned every time for a given item.
   317  func (t requestIDTracker) requestID(identifier ...string) string {
   318  	key := strings.Join(identifier, "/")
   319  	id, exists := t.requestIDs[key]
   320  	if !exists {
   321  		id = uuid.New()
   322  		t.requestIDs[key] = id
   323  	}
   324  	return id
   325  }
   326  
   327  // resetRequestID resets the request ID used for a particular item. This
   328  // should be called whenever a request fails, and a brand new request should be
   329  // sent.
   330  func (t requestIDTracker) resetRequestID(identifier ...string) {
   331  	key := strings.Join(identifier, "/")
   332  	delete(t.requestIDs, key)
   333  }
   334  
   335  // pendingItemTracker tracks a set of pending item names for a given type of resource
   336  type pendingItemTracker struct {
   337  	pendingItems map[string]cloudResources
   338  	removedQuota []gcptypes.QuotaUsage
   339  }
   340  
   341  func newPendingItemTracker() pendingItemTracker {
   342  	return pendingItemTracker{
   343  		pendingItems: map[string]cloudResources{},
   344  	}
   345  }
   346  
   347  // GetAllPendintItems returns a slice of all of the pending items across all types.
   348  func (t *pendingItemTracker) GetAllPendingItems() []cloudResource {
   349  	var items []cloudResource
   350  	for _, is := range t.pendingItems {
   351  		for _, i := range is {
   352  			items = append(items, i)
   353  		}
   354  	}
   355  	return items
   356  }
   357  
   358  // getPendingItems returns the list of resources to be deleted.
   359  func (t *pendingItemTracker) getPendingItems(itemType string) []cloudResource {
   360  	lastFound, exists := t.pendingItems[itemType]
   361  	if !exists {
   362  		lastFound = cloudResources{}
   363  	}
   364  	return lastFound.list()
   365  }
   366  
   367  // insertPendingItems adds to the list of resources to be deleted.
   368  func (t *pendingItemTracker) insertPendingItems(itemType string, items []cloudResource) []cloudResource {
   369  	lastFound, exists := t.pendingItems[itemType]
   370  	if !exists {
   371  		lastFound = cloudResources{}
   372  	}
   373  	lastFound = lastFound.insert(items...)
   374  	t.pendingItems[itemType] = lastFound
   375  	return lastFound.list()
   376  }
   377  
   378  // deletePendingItems removes from the list of resources to be deleted.
   379  func (t *pendingItemTracker) deletePendingItems(itemType string, items []cloudResource) []cloudResource {
   380  	lastFound, exists := t.pendingItems[itemType]
   381  	if !exists {
   382  		lastFound = cloudResources{}
   383  	}
   384  	for _, item := range items {
   385  		t.removedQuota = mergeAllUsage(t.removedQuota, item.quota)
   386  	}
   387  	lastFound = lastFound.delete(items...)
   388  	t.pendingItems[itemType] = lastFound
   389  	return lastFound.list()
   390  }
   391  
   392  func isErrorStatus(code int64) bool {
   393  	return code != 0 && (code < 200 || code >= 300)
   394  }
   395  
   396  func operationErrorMessage(op *compute.Operation) string {
   397  	errs := []string{}
   398  	if op.Error != nil {
   399  		for _, e := range op.Error.Errors {
   400  			errs = append(errs, fmt.Sprintf("%s: %s", e.Code, e.Message))
   401  		}
   402  	}
   403  	if len(errs) == 0 {
   404  		return op.HttpErrorMessage
   405  	}
   406  	return strings.Join(errs, ", ")
   407  }