github.com/openshift/installer@v1.4.17/pkg/destroy/azure/azure.go (about)

     1  package azure
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net/http"
     8  	"sort"
     9  	"strings"
    10  	"time"
    11  
    12  	azurestackdns "github.com/Azure/azure-sdk-for-go/profiles/2018-03-01/dns/mgmt/dns"
    13  	"github.com/Azure/azure-sdk-for-go/sdk/azcore"
    14  	"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
    15  	azcoreto "github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
    16  	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
    17  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph"
    18  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
    19  	"github.com/Azure/azure-sdk-for-go/services/preview/dns/mgmt/2018-03-01-preview/dns"
    20  	"github.com/Azure/azure-sdk-for-go/services/privatedns/mgmt/2018-09-01/privatedns"
    21  	"github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2018-05-01/resources"
    22  	"github.com/Azure/go-autorest/autorest"
    23  	azureenv "github.com/Azure/go-autorest/autorest/azure"
    24  	"github.com/Azure/go-autorest/autorest/to"
    25  	msgraphsdk "github.com/microsoftgraph/msgraph-sdk-go"
    26  	"github.com/microsoftgraph/msgraph-sdk-go/applications"
    27  	"github.com/microsoftgraph/msgraph-sdk-go/models"
    28  	"github.com/microsoftgraph/msgraph-sdk-go/models/odataerrors"
    29  	"github.com/microsoftgraph/msgraph-sdk-go/serviceprincipals"
    30  	"github.com/sirupsen/logrus"
    31  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    32  	"k8s.io/apimachinery/pkg/util/sets"
    33  	"k8s.io/apimachinery/pkg/util/wait"
    34  
    35  	azuresession "github.com/openshift/installer/pkg/asset/installconfig/azure"
    36  	"github.com/openshift/installer/pkg/destroy/providers"
    37  	"github.com/openshift/installer/pkg/types"
    38  	"github.com/openshift/installer/pkg/types/azure"
    39  )
    40  
    41  // ClusterUninstaller holds the various options for the cluster we want to delete.
    42  type ClusterUninstaller struct {
    43  	CloudName azure.CloudEnvironment
    44  	Session   *azuresession.Session
    45  
    46  	InfraID                     string
    47  	ResourceGroupName           string
    48  	BaseDomainResourceGroupName string
    49  	NetworkResourceGroupName    string
    50  
    51  	Logger logrus.FieldLogger
    52  
    53  	resourceGroupsClient    resources.GroupsClient
    54  	zonesClient             dns.ZonesClient
    55  	recordsClient           dns.RecordSetsClient
    56  	privateRecordSetsClient privatedns.RecordSetsClient
    57  	privateZonesClient      privatedns.PrivateZonesClient
    58  	msgraphClient           *msgraphsdk.GraphServiceClient
    59  	resourceGraphClient     *armresourcegraph.Client
    60  	tagsClient              *armresources.TagsClient
    61  }
    62  
    63  func (o *ClusterUninstaller) configureClients() error {
    64  	subscriptionID := o.Session.Credentials.SubscriptionID
    65  	endpoint := o.Session.Environment.ResourceManagerEndpoint
    66  
    67  	o.resourceGroupsClient = resources.NewGroupsClientWithBaseURI(endpoint, subscriptionID)
    68  	o.resourceGroupsClient.Authorizer = o.Session.Authorizer
    69  
    70  	o.zonesClient = dns.NewZonesClientWithBaseURI(endpoint, subscriptionID)
    71  	o.zonesClient.Authorizer = o.Session.Authorizer
    72  
    73  	o.recordsClient = dns.NewRecordSetsClientWithBaseURI(endpoint, subscriptionID)
    74  	o.recordsClient.Authorizer = o.Session.Authorizer
    75  
    76  	o.privateZonesClient = privatedns.NewPrivateZonesClientWithBaseURI(endpoint, subscriptionID)
    77  	o.privateZonesClient.Authorizer = o.Session.Authorizer
    78  
    79  	o.privateRecordSetsClient = privatedns.NewRecordSetsClientWithBaseURI(endpoint, subscriptionID)
    80  	o.privateRecordSetsClient.Authorizer = o.Session.Authorizer
    81  
    82  	adapter, err := msgraphsdk.NewGraphRequestAdapter(o.Session.AuthProvider)
    83  	if err != nil {
    84  		return err
    85  	}
    86  	// This can be empty for StackCloud
    87  	if o.Session.Environment.MicrosoftGraphEndpoint != "" {
    88  		// Set the service root to the Microsoft Graph for the appropriate
    89  		// cloud endpoint (e.g, GovCloud). Failing to do so results in an
    90  		// unhelpful `context deadline exceeded` error.
    91  		// NOTE: The API version must be included in the URL
    92  		// See https://issues.redhat.com/browse/OCPBUGS-4549
    93  		// See https://learn.microsoft.com/en-us/graph/sdks/national-clouds?tabs=go
    94  		adapter.SetBaseUrl(fmt.Sprintf("%s/v1.0", o.Session.Environment.MicrosoftGraphEndpoint))
    95  	}
    96  	o.msgraphClient = msgraphsdk.NewGraphServiceClient(adapter)
    97  
    98  	clientOpts := &arm.ClientOptions{
    99  		ClientOptions: azcore.ClientOptions{
   100  			Cloud: o.Session.CloudConfig,
   101  		},
   102  	}
   103  
   104  	rgClient, err := armresourcegraph.NewClient(o.Session.TokenCreds, clientOpts)
   105  	if err != nil {
   106  		return err
   107  	}
   108  	o.resourceGraphClient = rgClient
   109  
   110  	tagsClient, err := armresources.NewTagsClient(o.Session.Credentials.SubscriptionID, o.Session.TokenCreds, clientOpts)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	o.tagsClient = tagsClient
   115  
   116  	return nil
   117  }
   118  
   119  // New returns an Azure destroyer from ClusterMetadata.
   120  func New(logger logrus.FieldLogger, metadata *types.ClusterMetadata) (providers.Destroyer, error) {
   121  	cloudName := metadata.Azure.CloudName
   122  	if cloudName == "" {
   123  		cloudName = azure.PublicCloud
   124  	}
   125  	session, err := azuresession.GetSession(cloudName, metadata.Azure.ARMEndpoint)
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	return &ClusterUninstaller{
   131  		Session:                     session,
   132  		InfraID:                     metadata.InfraID,
   133  		ResourceGroupName:           metadata.Azure.ResourceGroupName,
   134  		Logger:                      logger,
   135  		BaseDomainResourceGroupName: metadata.Azure.BaseDomainResourceGroupName,
   136  		CloudName:                   cloudName,
   137  	}, nil
   138  }
   139  
   140  // Run is the entrypoint to start the uninstall process.
   141  func (o *ClusterUninstaller) Run() (*types.ClusterQuota, error) {
   142  	var errs []error
   143  	var err error
   144  
   145  	err = o.configureClients()
   146  	if err != nil {
   147  		return nil, err
   148  	}
   149  
   150  	// 2 hours
   151  	timeout := 120 * time.Minute
   152  	waitCtx, cancel := context.WithTimeout(context.Background(), timeout)
   153  	defer cancel()
   154  
   155  	// Retrieve metadata from resource group tags, if available
   156  	filter := fmt.Sprintf("tagName eq 'kubernetes.io_cluster.%s' and tagValue eq 'owned'", o.InfraID)
   157  	groupPager, err := o.resourceGroupsClient.ListComplete(waitCtx, filter, to.Int32Ptr(1))
   158  	if err != nil {
   159  		return nil, fmt.Errorf("could not list resource groups: %w", err)
   160  	}
   161  
   162  	for ; groupPager.NotDone(); err = groupPager.NextWithContext(waitCtx) {
   163  		if err != nil {
   164  			o.Logger.Debugf("failed to advance to next resource group list page: %v", err)
   165  			continue
   166  		}
   167  		group := groupPager.Value()
   168  		if len(o.ResourceGroupName) == 0 {
   169  			o.ResourceGroupName = to.String(group.Name)
   170  			o.Logger.Debugf("found resource group name=%s from tags", o.ResourceGroupName)
   171  		}
   172  		if len(o.BaseDomainResourceGroupName) == 0 {
   173  			o.BaseDomainResourceGroupName = to.String(group.Tags[azure.TagMetadataBaseDomainRG])
   174  			o.Logger.Debugf("found base domain resource group name=%s from tags", o.BaseDomainResourceGroupName)
   175  		}
   176  		if len(o.NetworkResourceGroupName) == 0 {
   177  			o.NetworkResourceGroupName = to.String(group.Tags[azure.TagMetadataNetworkRG])
   178  			o.Logger.Debugf("found network resource group name=%s from tags", o.NetworkResourceGroupName)
   179  		}
   180  	}
   181  
   182  	if len(o.ResourceGroupName) == 0 {
   183  		o.ResourceGroupName = o.InfraID + "-rg"
   184  		o.Logger.Debugf("using default resource group name=%s", o.ResourceGroupName)
   185  	}
   186  
   187  	err = wait.PollUntilContextCancel(
   188  		waitCtx,
   189  		1*time.Second,
   190  		false,
   191  		func(ctx context.Context) (bool, error) {
   192  			o.Logger.Debugf("deleting public records")
   193  			if o.CloudName == azure.StackCloud {
   194  				err = deleteAzureStackPublicRecords(ctx, o)
   195  			} else {
   196  				err = deletePublicRecords(ctx, o.zonesClient, o.recordsClient, o.privateZonesClient, o.privateRecordSetsClient, o.Logger, o.ResourceGroupName)
   197  			}
   198  			if err != nil {
   199  				o.Logger.Debug(err)
   200  				if isAuthError(err) {
   201  					errs = append(errs, fmt.Errorf("unable to authenticate when deleting public DNS records: %w", err))
   202  					return true, err
   203  				}
   204  				return false, nil
   205  			}
   206  			return true, nil
   207  		},
   208  	)
   209  	if err != nil {
   210  		errs = append(errs, fmt.Errorf("failed to delete public DNS records: %w", err))
   211  		o.Logger.Debug(err)
   212  	}
   213  
   214  	err = wait.PollUntilContextCancel(
   215  		waitCtx,
   216  		1*time.Second,
   217  		false,
   218  		func(ctx context.Context) (bool, error) {
   219  			o.Logger.Debugf("deleting resource group")
   220  			err = deleteResourceGroup(ctx, o.resourceGroupsClient, o.Logger, o.ResourceGroupName)
   221  			if err != nil {
   222  				o.Logger.Debug(err)
   223  				if isAuthError(err) {
   224  					errs = append(errs, fmt.Errorf("unable to authenticate when deleting resource group: %w", err))
   225  					return true, err
   226  				} else if isResourceGroupBlockedError(err) {
   227  					errs = append(errs, fmt.Errorf("unable to delete resource group, resources in the group are in use by others: %w", err))
   228  					return true, err
   229  				}
   230  				return false, nil
   231  			}
   232  			return true, nil
   233  		},
   234  	)
   235  	if err != nil {
   236  		errs = append(errs, fmt.Errorf("failed to delete resource group: %w", err))
   237  		o.Logger.Debug(err)
   238  	}
   239  
   240  	err = wait.PollUntilContextCancel(
   241  		waitCtx,
   242  		1*time.Second,
   243  		false,
   244  		func(ctx context.Context) (bool, error) {
   245  			o.Logger.Debugf("deleting application registrations")
   246  			err = deleteApplicationRegistrations(ctx, o.msgraphClient, o.Logger, o.InfraID)
   247  			if err != nil {
   248  				oDataErr := extractODataError(err)
   249  				o.Logger.Debug(oDataErr)
   250  				if isAuthError(err) {
   251  					errs = append(errs, fmt.Errorf("unable to authenticate when deleting application registrations and their service principals: %w", oDataErr))
   252  					return true, err
   253  				}
   254  				return false, nil
   255  			}
   256  			return true, nil
   257  		},
   258  	)
   259  	if err != nil {
   260  		errs = append(errs, fmt.Errorf("failed to delete application registrations and their service principals: %w", err))
   261  		o.Logger.Debug(err)
   262  	}
   263  
   264  	// do not attempt to remove shared tags on azure stack hub,
   265  	// as the resource graph api is not supported there.
   266  	if o.CloudName != azure.StackCloud {
   267  		if err := removeSharedTags(
   268  			waitCtx, o.resourceGraphClient, o.tagsClient, o.InfraID, o.Session.Credentials.SubscriptionID, o.Logger,
   269  		); err != nil {
   270  			errs = append(errs, fmt.Errorf("failed to remove shared tags: %w", err))
   271  			o.Logger.Debug(err)
   272  		}
   273  	}
   274  
   275  	return nil, utilerrors.NewAggregate(errs)
   276  }
   277  
   278  func removeSharedTags(
   279  	ctx context.Context,
   280  	graphClient *armresourcegraph.Client,
   281  	tagsClient *armresources.TagsClient,
   282  	infraID, subscriptionID string,
   283  	logger logrus.FieldLogger,
   284  ) error {
   285  	tagKey := fmt.Sprintf("kubernetes.io_cluster.%s", infraID)
   286  	query := fmt.Sprintf(
   287  		"resources | where tags.['%s'] == 'shared' | project id, name, type",
   288  		tagKey,
   289  	)
   290  	results, err := graphClient.Resources(ctx,
   291  		armresourcegraph.QueryRequest{
   292  			Query: &query,
   293  			Subscriptions: []*string{
   294  				&subscriptionID,
   295  			},
   296  			Options: &armresourcegraph.QueryRequestOptions{
   297  				ResultFormat: azcoreto.Ptr(armresourcegraph.ResultFormatObjectArray),
   298  			},
   299  		},
   300  		nil,
   301  	)
   302  	if err != nil {
   303  		return fmt.Errorf("failed to query resources with shared tag: %w", err)
   304  	}
   305  
   306  	tagsParam := armresources.TagsPatchResource{
   307  		Operation: azcoreto.Ptr(armresources.TagsPatchOperationDelete),
   308  		Properties: &armresources.Tags{
   309  			Tags: map[string]*string{
   310  				tagKey: to.StringPtr("shared"),
   311  			},
   312  		},
   313  	}
   314  
   315  	m, ok := results.Data.([]any)
   316  	if !ok {
   317  		logger.Debugf("could not cast results data (of type %T) to []any, skipping", results.Data)
   318  		return nil
   319  	}
   320  
   321  	var errs []error
   322  	for _, r := range m {
   323  		items, ok := r.(map[string]any)
   324  		if !ok {
   325  			logger.Debugf("could not cast items (of type %T) to map[strin]any, skipping", items)
   326  			continue
   327  		}
   328  		resourceName, ok := items["name"].(string)
   329  		if !ok {
   330  			logger.Debugf("could not cast resource name (of type %T) to string, skipping", items["name"])
   331  			continue
   332  		}
   333  		resourceType, ok := items["type"].(string)
   334  		if !ok {
   335  			logger.Debugf("could not cast resource type (of type %T) to string, skipping", items["type"])
   336  			continue
   337  		}
   338  		resourceID, ok := items["id"].(string)
   339  		if !ok {
   340  			logger.Debugf("could not cast resource id (of type %T) to string, skipping", items["id"])
   341  			continue
   342  		}
   343  		logger := logger.WithFields(logrus.Fields{
   344  			"resource": resourceName,
   345  			"type":     resourceType,
   346  		})
   347  		logger.Debugf("removing shared tag from resource %q", resourceName)
   348  		if _, err := tagsClient.UpdateAtScope(ctx, resourceID, tagsParam, nil); err != nil {
   349  			errs = append(errs, fmt.Errorf("failed to remove shared tag from %s: %w", resourceName, err))
   350  		}
   351  		logger.Infoln("removed shared tag")
   352  	}
   353  	return utilerrors.NewAggregate(errs)
   354  }
   355  
   356  func deleteAzureStackPublicRecords(ctx context.Context, o *ClusterUninstaller) error {
   357  	ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
   358  	defer cancel()
   359  
   360  	logger := o.Logger
   361  	rgName := o.BaseDomainResourceGroupName
   362  
   363  	dnsClient := azurestackdns.NewZonesClientWithBaseURI(o.Session.Environment.ResourceManagerEndpoint, o.Session.Credentials.SubscriptionID)
   364  	dnsClient.Authorizer = o.Session.Authorizer
   365  
   366  	recordsClient := azurestackdns.NewRecordSetsClientWithBaseURI(o.Session.Environment.ResourceManagerEndpoint, o.Session.Credentials.SubscriptionID)
   367  	recordsClient.Authorizer = o.Session.Authorizer
   368  
   369  	var errs []error
   370  
   371  	zonesPage, err := dnsClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
   372  	logger.Debug(err)
   373  	if err != nil {
   374  		if zonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
   375  			logger.Debug("already deleted the AzureStack zones")
   376  			return utilerrors.NewAggregate(errs)
   377  		}
   378  		errs = append(errs, fmt.Errorf("failed to list dns zone: %w", err))
   379  		if isAuthError(err) {
   380  			return err
   381  		}
   382  	}
   383  
   384  	allZones := sets.NewString()
   385  	for ; zonesPage.NotDone(); err = zonesPage.NextWithContext(ctx) {
   386  		if err != nil {
   387  			errs = append(errs, fmt.Errorf("failed to advance to next dns zone: %w", err))
   388  			continue
   389  		}
   390  		for _, zone := range zonesPage.Values() {
   391  			allZones.Insert(to.String(zone.Name))
   392  		}
   393  	}
   394  
   395  	clusterTag := fmt.Sprintf("kubernetes.io_cluster.%s", o.InfraID)
   396  	for _, zone := range allZones.List() {
   397  		for recordPages, err := recordsClient.ListByDNSZone(ctx, rgName, zone, to.Int32Ptr(100), ""); recordPages.NotDone(); err = recordPages.NextWithContext(ctx) {
   398  			if err != nil {
   399  				return err
   400  			}
   401  			for _, record := range recordPages.Values() {
   402  				metadata := to.StringMap(record.Metadata)
   403  				_, found := metadata[clusterTag]
   404  				if found {
   405  					resp, err := recordsClient.Delete(ctx, rgName, zone, to.String(record.Name), toAzureStackRecordType(to.String(record.Type)), "")
   406  					if err != nil {
   407  						if wasNotFound(resp.Response) {
   408  							logger.WithField("record", to.String(record.Name)).Debug("already deleted")
   409  							continue
   410  						}
   411  						return fmt.Errorf("failed to delete record %s in zone %s: %w", to.String(record.Name), zone, err)
   412  					}
   413  					logger.WithField("record", to.String(record.Name)).Info("deleted")
   414  				}
   415  			}
   416  		}
   417  	}
   418  
   419  	return utilerrors.NewAggregate(errs)
   420  }
   421  
   422  func deletePublicRecords(ctx context.Context, dnsClient dns.ZonesClient, recordsClient dns.RecordSetsClient, privateDNSClient privatedns.PrivateZonesClient, privateRecordsClient privatedns.RecordSetsClient, logger logrus.FieldLogger, rgName string) error {
   423  	ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
   424  	defer cancel()
   425  
   426  	// collect records from private zones in rgName
   427  	var errs []error
   428  
   429  	zonesPage, err := dnsClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
   430  	if err != nil {
   431  		if zonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
   432  			logger.Debug("already deleted")
   433  			return utilerrors.NewAggregate(errs)
   434  		}
   435  		errs = append(errs, fmt.Errorf("failed to list dns zone: %w", err))
   436  		if isAuthError(err) {
   437  			return err
   438  		}
   439  	}
   440  
   441  	pageCount := 0
   442  	for ; zonesPage.NotDone(); err = zonesPage.NextWithContext(ctx) {
   443  		if err != nil {
   444  			errs = append(errs, fmt.Errorf("failed to advance to next dns zone: %w", err))
   445  			continue
   446  		}
   447  		pageCount++
   448  
   449  		for _, zone := range zonesPage.Values() {
   450  			if zone.ZoneType == dns.Private {
   451  				if err := deletePublicRecordsForZone(ctx, dnsClient, recordsClient, logger, rgName, to.String(zone.Name)); err != nil {
   452  					errs = append(errs, fmt.Errorf("failed to delete public records for %s: %w", to.String(zone.Name), err))
   453  					if isAuthError(err) {
   454  						return err
   455  					}
   456  					continue
   457  				}
   458  			}
   459  		}
   460  	}
   461  
   462  	privateZonesPage, err := privateDNSClient.ListByResourceGroup(ctx, rgName, to.Int32Ptr(100))
   463  	if err != nil {
   464  		if privateZonesPage.Response().IsHTTPStatus(http.StatusNotFound) {
   465  			logger.Debug("already deleted")
   466  			return utilerrors.NewAggregate(errs)
   467  		}
   468  		errs = append(errs, fmt.Errorf("failed to list private dns zone: %w", err))
   469  		if isAuthError(err) {
   470  			return err
   471  		}
   472  	}
   473  
   474  	for ; privateZonesPage.NotDone(); err = privateZonesPage.NextWithContext(ctx) {
   475  		if err != nil {
   476  			errs = append(errs, fmt.Errorf("failed to advance to next dns zone: %w", err))
   477  			continue
   478  		}
   479  		pageCount++
   480  
   481  		for _, zone := range privateZonesPage.Values() {
   482  			if err := deletePublicRecordsForPrivateZone(ctx, privateRecordsClient, dnsClient, recordsClient, logger, rgName, to.String(zone.Name)); err != nil {
   483  				errs = append(errs, fmt.Errorf("failed to delete public records for %s: %w", to.String(zone.Name), err))
   484  				if isAuthError(err) {
   485  					return err
   486  				}
   487  				continue
   488  			}
   489  		}
   490  	}
   491  
   492  	if pageCount == 0 {
   493  		logger.Warn("no DNS records found: either they were already deleted or the service principal lacks permissions to list them")
   494  	}
   495  
   496  	return utilerrors.NewAggregate(errs)
   497  }
   498  
   499  func deletePublicRecordsForZone(ctx context.Context, dnsClient dns.ZonesClient, recordsClient dns.RecordSetsClient, logger logrus.FieldLogger, zoneGroup, zoneName string) error {
   500  	// collect all the records from the zoneName
   501  	allPrivateRecords := sets.NewString()
   502  	for recordPages, err := recordsClient.ListByDNSZone(ctx, zoneGroup, zoneName, to.Int32Ptr(100), ""); recordPages.NotDone(); err = recordPages.NextWithContext(ctx) {
   503  		if err != nil {
   504  			return err
   505  		}
   506  		for _, record := range recordPages.Values() {
   507  			if t := toRecordType(to.String(record.Type)); t == dns.SOA || t == dns.NS {
   508  				continue
   509  			}
   510  			allPrivateRecords.Insert(fmt.Sprintf("%s.%s", to.String(record.Name), zoneName))
   511  		}
   512  	}
   513  
   514  	return deletePublicRecordsMatchingZoneName(ctx, dnsClient, recordsClient, logger, allPrivateRecords, zoneName)
   515  }
   516  
   517  func deletePublicRecordsForPrivateZone(ctx context.Context, privateRecordsClient privatedns.RecordSetsClient, dnsClient dns.ZonesClient, recordsClient dns.RecordSetsClient, logger logrus.FieldLogger, zoneGroup, zoneName string) error {
   518  	// collect all the records from the zoneName
   519  	allPrivateRecords := sets.NewString()
   520  	for recordPages, err := privateRecordsClient.List(ctx, zoneGroup, zoneName, to.Int32Ptr(100), ""); recordPages.NotDone(); err = recordPages.NextWithContext(ctx) {
   521  		if err != nil {
   522  			return err
   523  		}
   524  		for _, record := range recordPages.Values() {
   525  			if t := toRecordType(to.String(record.Type)); t == dns.SOA || t == dns.NS {
   526  				continue
   527  			}
   528  			allPrivateRecords.Insert(fmt.Sprintf("%s.%s", to.String(record.Name), zoneName))
   529  		}
   530  	}
   531  
   532  	return deletePublicRecordsMatchingZoneName(ctx, dnsClient, recordsClient, logger, allPrivateRecords, zoneName)
   533  }
   534  
   535  func deletePublicRecordsMatchingZoneName(ctx context.Context, dnsClient dns.ZonesClient, recordsClient dns.RecordSetsClient, logger logrus.FieldLogger, privateRecords sets.String, zoneName string) error {
   536  	sharedZones, err := getSharedDNSZones(ctx, dnsClient, zoneName)
   537  	if err != nil {
   538  		return fmt.Errorf("failed to find shared zone for %s: %w", zoneName, err)
   539  	}
   540  	for _, sharedZone := range sharedZones {
   541  		logger.Debugf("removing matching private records from %s", sharedZone.Name)
   542  		for recordPages, err := recordsClient.ListByDNSZone(ctx, sharedZone.Group, sharedZone.Name, to.Int32Ptr(100), ""); recordPages.NotDone(); err = recordPages.NextWithContext(ctx) {
   543  			if err != nil {
   544  				return err
   545  			}
   546  			for _, record := range recordPages.Values() {
   547  				if privateRecords.Has(fmt.Sprintf("%s.%s", to.String(record.Name), sharedZone.Name)) {
   548  					resp, err := recordsClient.Delete(ctx, sharedZone.Group, sharedZone.Name, to.String(record.Name), toRecordType(to.String(record.Type)), "")
   549  					if err != nil {
   550  						if wasNotFound(resp.Response) {
   551  							logger.WithField("record", to.String(record.Name)).Debug("already deleted")
   552  							continue
   553  						}
   554  						return fmt.Errorf("failed to delete record %s in zone %s: %w", to.String(record.Name), sharedZone.Name, err)
   555  					}
   556  					logger.WithField("record", to.String(record.Name)).Info("deleted")
   557  				}
   558  			}
   559  		}
   560  	}
   561  	return nil
   562  }
   563  
   564  // getSharedDNSZones returns the all parent public dns zones for privZoneName in decreasing order of closeness.
   565  func getSharedDNSZones(ctx context.Context, client dns.ZonesClient, privZoneName string) ([]dnsZone, error) {
   566  	domain := privZoneName
   567  	parents := sets.NewString(domain)
   568  	for {
   569  		idx := strings.Index(domain, ".")
   570  		if idx == -1 {
   571  			break
   572  		}
   573  		if len(domain[idx+1:]) > 0 {
   574  			parents.Insert(domain[idx+1:])
   575  		}
   576  		domain = domain[idx+1:]
   577  	}
   578  
   579  	allPublicZones := []dnsZone{}
   580  	for zonesPage, err := client.List(ctx, to.Int32Ptr(100)); zonesPage.NotDone(); err = zonesPage.NextWithContext(ctx) {
   581  		if err != nil {
   582  			return nil, err
   583  		}
   584  		for _, zone := range zonesPage.Values() {
   585  			if zone.ZoneType == dns.Public && parents.Has(to.String(zone.Name)) {
   586  				allPublicZones = append(allPublicZones, dnsZone{Name: to.String(zone.Name), ID: to.String(zone.ID), Group: groupFromID(to.String(zone.ID)), Public: true})
   587  				continue
   588  			}
   589  		}
   590  	}
   591  	sort.Slice(allPublicZones, func(i, j int) bool { return len(allPublicZones[i].Name) > len(allPublicZones[j].Name) })
   592  	return allPublicZones, nil
   593  }
   594  
   595  type dnsZone struct {
   596  	Name   string
   597  	ID     string
   598  	Group  string
   599  	Public bool
   600  }
   601  
   602  func groupFromID(id string) string {
   603  	return strings.Split(id, "/")[4]
   604  }
   605  
   606  func toRecordType(t string) dns.RecordType {
   607  	return dns.RecordType(strings.TrimPrefix(t, "Microsoft.Network/dnszones/"))
   608  }
   609  
   610  func toAzureStackRecordType(t string) azurestackdns.RecordType {
   611  	return azurestackdns.RecordType(strings.TrimPrefix(t, "Microsoft.Network/dnszones/"))
   612  }
   613  
   614  func deleteResourceGroup(ctx context.Context, client resources.GroupsClient, logger logrus.FieldLogger, name string) error {
   615  	logger = logger.WithField("resource group", name)
   616  	ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
   617  	defer cancel()
   618  
   619  	delFuture, err := client.Delete(ctx, name)
   620  	if err == nil {
   621  		err = delFuture.WaitForCompletionRef(ctx, client.Client)
   622  	}
   623  	if err != nil {
   624  		if isNotFoundError(err) {
   625  			logger.Debug("already deleted")
   626  			return nil
   627  		}
   628  		return fmt.Errorf("failed to delete %s: %w", name, err)
   629  	}
   630  	logger.Info("deleted")
   631  	return nil
   632  }
   633  
   634  func wasNotFound(resp *http.Response) bool {
   635  	return resp != nil && resp.StatusCode == http.StatusNotFound
   636  }
   637  
   638  func isNotFoundError(err error) bool {
   639  	if err == nil {
   640  		return false
   641  	}
   642  
   643  	var dErr autorest.DetailedError
   644  	if errors.As(err, &dErr) {
   645  		if dErr.StatusCode == http.StatusNotFound {
   646  			return true
   647  		}
   648  
   649  		if dErr.StatusCode == 0 {
   650  			var serviceErr *azureenv.ServiceError
   651  			if errors.As(dErr.Original, &serviceErr) && strings.HasSuffix(serviceErr.Code, "NotFound") {
   652  				return true
   653  			}
   654  		}
   655  	}
   656  
   657  	return false
   658  }
   659  
   660  func isAuthError(err error) bool {
   661  	if err == nil {
   662  		return false
   663  	}
   664  
   665  	var dErr autorest.DetailedError
   666  	if errors.As(err, &dErr) {
   667  		switch statusCode := dErr.StatusCode.(type) {
   668  		case int:
   669  			if statusCode >= 400 && statusCode <= 403 {
   670  				return true
   671  			}
   672  		}
   673  	}
   674  
   675  	// https://github.com/Azure/azure-sdk-for-go/issues/16736
   676  	// https://github.com/Azure/azure-sdk-for-go/blob/sdk/azidentity/v1.1.0/sdk/azidentity/errors.go#L36
   677  	var authErr *azidentity.AuthenticationFailedError
   678  	if errors.As(err, &authErr) {
   679  		if authErr.RawResponse.StatusCode >= 400 && authErr.RawResponse.StatusCode <= 403 {
   680  			return true
   681  		}
   682  	}
   683  
   684  	return false
   685  }
   686  
   687  func isResourceGroupBlockedError(err error) bool {
   688  	if err == nil {
   689  		return false
   690  	}
   691  
   692  	var dErr autorest.DetailedError
   693  	if errors.As(err, &dErr) {
   694  		switch statusCode := dErr.StatusCode.(type) {
   695  		case int:
   696  			if statusCode == 409 {
   697  				return true
   698  			}
   699  		}
   700  	}
   701  
   702  	return false
   703  }
   704  
   705  // Errors returned by the new Azure SDK are not very helpful. They just say
   706  // "error status code received from the API". This function unwraps the
   707  // ODataErr, if possible, and returns a new error with a more friendly "code:
   708  // message" format.
   709  func extractODataError(err error) error {
   710  	var oDataErr *odataerrors.ODataError
   711  	if errors.As(err, &oDataErr) {
   712  		if typed := oDataErr.GetError(); typed != nil {
   713  			return fmt.Errorf("%s: %s", *typed.GetCode(), *typed.GetMessage())
   714  		}
   715  	}
   716  	return err
   717  }
   718  
   719  func deleteApplicationRegistrations(ctx context.Context, graphClient *msgraphsdk.GraphServiceClient, logger logrus.FieldLogger, infraID string) error {
   720  	tag := fmt.Sprintf("kubernetes.io_cluster.%s=owned", infraID)
   721  	servicePrincipals, err := getServicePrincipalsByTag(ctx, graphClient, tag, infraID)
   722  	if err != nil {
   723  		return fmt.Errorf("failed to gather list of Service Principals by tag: %w", err)
   724  	}
   725  	// msgraphsdk can return a `nil` response even if no errors occurred
   726  	if servicePrincipals == nil {
   727  		logger.Debug("Empty response from API when listing Service Principals by tag")
   728  		return nil
   729  	}
   730  
   731  	var errorList []error
   732  	for _, sp := range servicePrincipals {
   733  		appID := *sp.GetAppId()
   734  		logger := logger.WithField("appID", appID)
   735  
   736  		filter := fmt.Sprintf("appId eq '%s'", appID)
   737  		listQuery := applications.ApplicationsRequestBuilderGetRequestConfiguration{
   738  			QueryParameters: &applications.ApplicationsRequestBuilderGetQueryParameters{
   739  				Filter: &filter,
   740  			},
   741  		}
   742  
   743  		resp, err := graphClient.Applications().Get(ctx, &listQuery)
   744  		if err != nil {
   745  			errorList = append(errorList, err)
   746  			continue
   747  		}
   748  		// msgraphsdk can return a `nil` response even if no errors occurred
   749  		if resp == nil {
   750  			logger.Debugf("Empty response getting Application from Service Principal %s", *sp.GetDisplayName())
   751  			continue
   752  		}
   753  		apps := resp.GetValue()
   754  		if len(apps) != 1 {
   755  			err = fmt.Errorf("should have received only a single matching AppID, received %d instead", len(apps))
   756  			errorList = append(errorList, err)
   757  		}
   758  
   759  		err = graphClient.ApplicationsById(*apps[0].GetId()).Delete(ctx, nil)
   760  		if err != nil {
   761  			errorList = append(errorList, err)
   762  		}
   763  		logger.Info("Deleted")
   764  	}
   765  
   766  	return utilerrors.NewAggregate(errorList)
   767  }
   768  
   769  func getServicePrincipalsByTag(ctx context.Context, graphClient *msgraphsdk.GraphServiceClient, matchTag, infraID string) ([]models.ServicePrincipalable, error) {
   770  	filter := fmt.Sprintf("startswith(displayName, '%s') and tags/any(s:s eq '%s')", infraID, matchTag)
   771  	listQuery := serviceprincipals.ServicePrincipalsRequestBuilderGetRequestConfiguration{
   772  		QueryParameters: &serviceprincipals.ServicePrincipalsRequestBuilderGetQueryParameters{
   773  			Filter: &filter,
   774  		},
   775  	}
   776  	resp, err := graphClient.ServicePrincipals().Get(ctx, &listQuery)
   777  	if err != nil || resp == nil {
   778  		return nil, err
   779  	}
   780  	return resp.GetValue(), nil
   781  }