sigs.k8s.io/cluster-api-provider-azure@v1.14.3/test/e2e/azure_privatecluster.go (about)

     1  //go:build e2e
     2  // +build e2e
     3  
     4  /*
     5  Copyright 2020 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2e
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os"
    26  	"path/filepath"
    27  
    28  	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
    29  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi"
    30  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
    31  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
    32  	. "github.com/onsi/ginkgo/v2"
    33  	. "github.com/onsi/gomega"
    34  	"github.com/pkg/errors"
    35  	corev1 "k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	"k8s.io/utils/ptr"
    38  	azureutil "sigs.k8s.io/cluster-api-provider-azure/util/azure"
    39  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    40  	"sigs.k8s.io/cluster-api/test/framework"
    41  	"sigs.k8s.io/cluster-api/test/framework/clusterctl"
    42  	"sigs.k8s.io/cluster-api/util"
    43  	"sigs.k8s.io/controller-runtime/pkg/client"
    44  )
    45  
    46  // AzurePrivateClusterSpecInput is the input for AzurePrivateClusterSpec.
    47  type AzurePrivateClusterSpecInput struct {
    48  	BootstrapClusterProxy framework.ClusterProxy
    49  	Namespace             *corev1.Namespace
    50  	ClusterName           string
    51  	ClusterctlConfigPath  string
    52  	E2EConfig             *clusterctl.E2EConfig
    53  	ArtifactFolder        string
    54  	SkipCleanup           bool
    55  	CancelWatches         context.CancelFunc
    56  }
    57  
    58  // AzurePrivateClusterSpec implements a test that creates a workload cluster with a private API endpoint.
    59  func AzurePrivateClusterSpec(ctx context.Context, inputGetter func() AzurePrivateClusterSpecInput) {
    60  	var (
    61  		specName            = "azure-private-cluster"
    62  		input               AzurePrivateClusterSpecInput
    63  		publicClusterProxy  framework.ClusterProxy
    64  		publicNamespace     *corev1.Namespace
    65  		publicCancelWatches context.CancelFunc
    66  		cluster             *clusterv1.Cluster
    67  		clusterName         string
    68  	)
    69  
    70  	input = inputGetter()
    71  	Expect(input).NotTo(BeNil())
    72  	Expect(input.BootstrapClusterProxy).NotTo(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName)
    73  	Expect(input.Namespace).NotTo(BeNil(), "Invalid argument. input.Namespace can't be nil when calling %s spec", specName)
    74  	By("creating a Kubernetes client to the workload cluster")
    75  	publicClusterProxy = input.BootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName)
    76  
    77  	Byf("Creating a namespace for hosting the %s test spec", specName)
    78  	Logf("starting to create namespace for hosting the %s test spec", specName)
    79  	publicNamespace, publicCancelWatches = framework.CreateNamespaceAndWatchEvents(ctx, framework.CreateNamespaceAndWatchEventsInput{
    80  		Creator:   publicClusterProxy.GetClient(),
    81  		ClientSet: publicClusterProxy.GetClientSet(),
    82  		Name:      input.Namespace.Name,
    83  		LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.ClusterName),
    84  	})
    85  
    86  	Expect(publicNamespace).NotTo(BeNil())
    87  	Expect(publicCancelWatches).NotTo(BeNil())
    88  
    89  	By("Initializing the workload cluster")
    90  	clusterctl.InitManagementClusterAndWatchControllerLogs(ctx, clusterctl.InitManagementClusterAndWatchControllerLogsInput{
    91  		ClusterProxy:            publicClusterProxy,
    92  		ClusterctlConfigPath:    input.ClusterctlConfigPath,
    93  		InfrastructureProviders: input.E2EConfig.InfrastructureProviders(),
    94  		AddonProviders:          input.E2EConfig.AddonProviders(),
    95  		LogFolder:               filepath.Join(input.ArtifactFolder, "clusters", input.ClusterName),
    96  	}, input.E2EConfig.GetIntervals(specName, "wait-controllers")...)
    97  
    98  	By("Ensure public API server is stable before creating private cluster")
    99  	Consistently(func() error {
   100  		ns := &corev1.Namespace{}
   101  		return publicClusterProxy.GetClient().Get(ctx, client.ObjectKey{Name: kubesystem}, ns)
   102  	}, "5s", "100ms").Should(BeNil(), "Failed to assert public API server stability")
   103  
   104  	// **************
   105  	// Get the Client ID for the user assigned identity
   106  	subscriptionID := os.Getenv(AzureSubscriptionID)
   107  	identityRG, ok := os.LookupEnv(AzureIdentityResourceGroup)
   108  	if !ok {
   109  		identityRG = "capz-ci"
   110  	}
   111  	userID, ok := os.LookupEnv(AzureUserIdentity)
   112  	if !ok {
   113  		userID = "cloud-provider-user-identity"
   114  	}
   115  	resourceID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.ManagedIdentity/userAssignedIdentities/%s", subscriptionID, identityRG, userID)
   116  	os.Setenv("UAMI_CLIENT_ID", getClientIDforMSI(resourceID))
   117  
   118  	os.Setenv("CLUSTER_IDENTITY_NAME", "cluster-identity-user-assigned")
   119  	os.Setenv("CLUSTER_IDENTITY_NAMESPACE", input.Namespace.Name)
   120  	// *************
   121  
   122  	By("Creating a private workload cluster")
   123  	clusterName = fmt.Sprintf("capz-e2e-%s-%s", util.RandomString(6), "private")
   124  	Expect(os.Setenv(AzureVNetName, clusterName+"-vnet")).To(Succeed())
   125  	Expect(os.Setenv(AzureVNetCidr, "10.255.0.0/16")).To(Succeed())
   126  	Expect(os.Setenv(AzureInternalLBIP, "10.255.0.100")).To(Succeed())
   127  	Expect(os.Setenv(AzureCPSubnetCidr, "10.255.0.0/24")).To(Succeed())
   128  	Expect(os.Setenv(AzureNodeSubnetCidr, "10.255.1.0/24")).To(Succeed())
   129  	Expect(os.Setenv(AzureBastionSubnetCidr, "10.255.255.224/27")).To(Succeed())
   130  	result := &clusterctl.ApplyClusterTemplateAndWaitResult{}
   131  
   132  	clusterctl.ApplyClusterTemplateAndWait(ctx, createApplyClusterTemplateInput(
   133  		specName,
   134  		withClusterProxy(publicClusterProxy),
   135  		withFlavor("private"),
   136  		withNamespace(input.Namespace.Name),
   137  		withClusterName(clusterName),
   138  		withControlPlaneMachineCount(3),
   139  		withWorkerMachineCount(1),
   140  		withClusterInterval(specName, "wait-private-cluster"),
   141  		withControlPlaneInterval(specName, "wait-control-plane-ha"),
   142  	), result)
   143  	cluster = result.Cluster
   144  
   145  	Expect(cluster).NotTo(BeNil())
   146  
   147  	defer func() {
   148  		// Delete the private cluster, so that all of the Azure resources will be cleaned up when the public
   149  		// cluster is deleted at the end of the test. If we don't delete this cluster, the Azure resource delete
   150  		// verification will fail.
   151  		cleanInput := cleanupInput{
   152  			SpecName:               specName,
   153  			Cluster:                cluster,
   154  			ClusterProxy:           publicClusterProxy,
   155  			Namespace:              input.Namespace,
   156  			CancelWatches:          publicCancelWatches,
   157  			IntervalsGetter:        e2eConfig.GetIntervals,
   158  			SkipCleanup:            input.SkipCleanup,
   159  			SkipLogCollection:      skipLogCollection,
   160  			ArtifactFolder:         input.ArtifactFolder,
   161  			SkipResourceGroupCheck: true, // We don't expect the resource group to be deleted since the private cluster does not own the resource group.
   162  		}
   163  		dumpSpecResourcesAndCleanup(ctx, cleanInput)
   164  	}()
   165  
   166  	// Check that azure bastion is provisioned successfully.
   167  	{
   168  		By("verifying the Azure Bastion Host was created successfully")
   169  		cred, err := azidentity.NewDefaultAzureCredential(nil)
   170  		Expect(err).NotTo(HaveOccurred())
   171  
   172  		azureBastionClient, err := armnetwork.NewBastionHostsClient(getSubscriptionID(Default), cred, nil)
   173  		Expect(err).NotTo(HaveOccurred())
   174  
   175  		groupName := os.Getenv(AzureResourceGroup)
   176  		azureBastionName := fmt.Sprintf("%s-azure-bastion", clusterName)
   177  
   178  		backoff := wait.Backoff{
   179  			Duration: retryBackoffInitialDuration,
   180  			Factor:   retryBackoffFactor,
   181  			Jitter:   retryBackoffJitter,
   182  			Steps:    retryBackoffSteps,
   183  		}
   184  		retryFn := func() (bool, error) {
   185  			resp, err := azureBastionClient.Get(ctx, groupName, azureBastionName, nil)
   186  			if err != nil {
   187  				return false, err
   188  			}
   189  
   190  			bastion := resp.BastionHost
   191  			switch ptr.Deref(bastion.Properties.ProvisioningState, "") {
   192  			case armnetwork.ProvisioningStateSucceeded:
   193  				return true, nil
   194  			case armnetwork.ProvisioningStateUpdating:
   195  				// Wait for operation to complete.
   196  				return false, nil
   197  			default:
   198  				return false, errors.New(fmt.Sprintf("Azure Bastion provisioning failed with state: %q", ptr.Deref(bastion.Properties.ProvisioningState, "(nil)")))
   199  			}
   200  		}
   201  		err = wait.ExponentialBackoff(backoff, retryFn)
   202  
   203  		Expect(err).NotTo(HaveOccurred())
   204  	}
   205  }
   206  
   207  // SetupExistingVNet creates a resource group and a VNet to be used by a workload cluster.
   208  func SetupExistingVNet(ctx context.Context, vnetCidr string, cpSubnetCidrs, nodeSubnetCidrs map[string]string, bastionSubnetName, bastionSubnetCidr string) func() {
   209  	By("creating Azure clients with the workload cluster's subscription")
   210  	subscriptionID := getSubscriptionID(Default)
   211  	cred, err := azidentity.NewDefaultAzureCredential(nil)
   212  	Expect(err).NotTo(HaveOccurred())
   213  
   214  	groupClient, err := armresources.NewResourceGroupsClient(subscriptionID, cred, nil)
   215  	Expect(err).NotTo(HaveOccurred())
   216  	vnetClient, err := armnetwork.NewVirtualNetworksClient(subscriptionID, cred, nil)
   217  	Expect(err).NotTo(HaveOccurred())
   218  	nsgClient, err := armnetwork.NewSecurityGroupsClient(subscriptionID, cred, nil)
   219  	Expect(err).NotTo(HaveOccurred())
   220  	routetableClient, err := armnetwork.NewRouteTablesClient(subscriptionID, cred, nil)
   221  	Expect(err).NotTo(HaveOccurred())
   222  
   223  	By("creating a resource group")
   224  	groupName := os.Getenv(AzureCustomVnetResourceGroup)
   225  	_, err = groupClient.CreateOrUpdate(ctx, groupName, armresources.ResourceGroup{
   226  		Location: ptr.To(os.Getenv(AzureLocation)),
   227  		Tags: map[string]*string{
   228  			"jobName":           ptr.To(os.Getenv(JobName)),
   229  			"creationTimestamp": ptr.To(os.Getenv(Timestamp)),
   230  		},
   231  	}, nil)
   232  	Expect(err).NotTo(HaveOccurred())
   233  
   234  	By("creating a network security group")
   235  	nsgName := "control-plane-nsg"
   236  	securityRules := []*armnetwork.SecurityRule{
   237  		{
   238  			Name: ptr.To("allow_ssh"),
   239  			Properties: &armnetwork.SecurityRulePropertiesFormat{
   240  				Description:              ptr.To("Allow SSH"),
   241  				Priority:                 ptr.To[int32](2200),
   242  				Protocol:                 ptr.To(armnetwork.SecurityRuleProtocolTCP),
   243  				Access:                   ptr.To(armnetwork.SecurityRuleAccessAllow),
   244  				Direction:                ptr.To(armnetwork.SecurityRuleDirectionInbound),
   245  				SourceAddressPrefix:      ptr.To("*"),
   246  				SourcePortRange:          ptr.To("*"),
   247  				DestinationAddressPrefix: ptr.To("*"),
   248  				DestinationPortRange:     ptr.To("22"),
   249  			},
   250  		},
   251  		{
   252  			Name: ptr.To("allow_apiserver"),
   253  			Properties: &armnetwork.SecurityRulePropertiesFormat{
   254  				Description:              ptr.To("Allow API Server"),
   255  				SourcePortRange:          ptr.To("*"),
   256  				DestinationPortRange:     ptr.To("6443"),
   257  				SourceAddressPrefix:      ptr.To("*"),
   258  				DestinationAddressPrefix: ptr.To("*"),
   259  				Protocol:                 ptr.To(armnetwork.SecurityRuleProtocolTCP),
   260  				Access:                   ptr.To(armnetwork.SecurityRuleAccessAllow),
   261  				Direction:                ptr.To(armnetwork.SecurityRuleDirectionInbound),
   262  				Priority:                 ptr.To[int32](2201),
   263  			},
   264  		},
   265  	}
   266  	nsgPoller, err := nsgClient.BeginCreateOrUpdate(ctx, groupName, nsgName, armnetwork.SecurityGroup{
   267  		Location: ptr.To(os.Getenv(AzureLocation)),
   268  		Properties: &armnetwork.SecurityGroupPropertiesFormat{
   269  			SecurityRules: securityRules,
   270  		},
   271  	}, nil)
   272  	Expect(err).NotTo(HaveOccurred())
   273  	_, err = nsgPoller.PollUntilDone(ctx, nil)
   274  	Expect(err).NotTo(HaveOccurred())
   275  
   276  	By("creating a node security group")
   277  	nsgNodeName := "node-nsg"
   278  	securityRulesNode := []*armnetwork.SecurityRule{}
   279  	nsgNodePoller, err := nsgClient.BeginCreateOrUpdate(ctx, groupName, nsgNodeName, armnetwork.SecurityGroup{
   280  		Location: ptr.To(os.Getenv(AzureLocation)),
   281  		Properties: &armnetwork.SecurityGroupPropertiesFormat{
   282  			SecurityRules: securityRulesNode,
   283  		},
   284  	}, nil)
   285  	Expect(err).NotTo(HaveOccurred())
   286  	_, err = nsgNodePoller.PollUntilDone(ctx, nil)
   287  	Expect(err).NotTo(HaveOccurred())
   288  
   289  	By("creating a node routetable")
   290  	routeTableName := "node-routetable"
   291  	routeTable := armnetwork.RouteTable{
   292  		Location:   ptr.To(os.Getenv(AzureLocation)),
   293  		Properties: &armnetwork.RouteTablePropertiesFormat{},
   294  	}
   295  	routetablePoller, err := routetableClient.BeginCreateOrUpdate(ctx, groupName, routeTableName, routeTable, nil)
   296  	Expect(err).NotTo(HaveOccurred())
   297  	_, err = routetablePoller.PollUntilDone(ctx, nil)
   298  	Expect(err).NotTo(HaveOccurred())
   299  
   300  	By("creating a virtual network")
   301  	var subnets []*armnetwork.Subnet
   302  	for name, cidr := range cpSubnetCidrs {
   303  		subnets = append(subnets, &armnetwork.Subnet{
   304  			Properties: &armnetwork.SubnetPropertiesFormat{
   305  				AddressPrefix: ptr.To(cidr),
   306  				NetworkSecurityGroup: &armnetwork.SecurityGroup{
   307  					ID: ptr.To(fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/networkSecurityGroups/%s", subscriptionID, groupName, nsgName)),
   308  				},
   309  			},
   310  			Name: ptr.To(name),
   311  		})
   312  	}
   313  	for name, cidr := range nodeSubnetCidrs {
   314  		subnets = append(subnets, &armnetwork.Subnet{
   315  			Properties: &armnetwork.SubnetPropertiesFormat{
   316  				AddressPrefix: ptr.To(cidr),
   317  				NetworkSecurityGroup: &armnetwork.SecurityGroup{
   318  					ID: ptr.To(fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/networkSecurityGroups/%s", subscriptionID, groupName, nsgNodeName)),
   319  				},
   320  				RouteTable: &armnetwork.RouteTable{
   321  					ID: ptr.To(fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/routeTables/%s", subscriptionID, groupName, routeTableName)),
   322  				},
   323  			},
   324  			Name: ptr.To(name),
   325  		})
   326  	}
   327  
   328  	// Create the AzureBastion subnet.
   329  	subnets = append(subnets, &armnetwork.Subnet{
   330  		Properties: &armnetwork.SubnetPropertiesFormat{
   331  			AddressPrefix: ptr.To(bastionSubnetCidr),
   332  		},
   333  		Name: ptr.To(bastionSubnetName),
   334  	})
   335  
   336  	vnetPoller, err := vnetClient.BeginCreateOrUpdate(ctx, groupName, os.Getenv(AzureCustomVNetName), armnetwork.VirtualNetwork{
   337  		Location: ptr.To(os.Getenv(AzureLocation)),
   338  		Properties: &armnetwork.VirtualNetworkPropertiesFormat{
   339  			AddressSpace: &armnetwork.AddressSpace{
   340  				AddressPrefixes: []*string{ptr.To(vnetCidr)},
   341  			},
   342  			Subnets: subnets,
   343  		},
   344  	}, nil)
   345  	if err != nil {
   346  		fmt.Print(err.Error())
   347  	}
   348  	Expect(err).NotTo(HaveOccurred())
   349  	_, err = vnetPoller.PollUntilDone(ctx, nil)
   350  	Expect(err).NotTo(HaveOccurred())
   351  
   352  	return func() {
   353  		Logf("deleting an existing virtual network %q", os.Getenv(AzureCustomVNetName))
   354  		vPoller, err := vnetClient.BeginDelete(ctx, groupName, os.Getenv(AzureCustomVNetName), nil)
   355  		Expect(err).NotTo(HaveOccurred())
   356  		_, err = vPoller.PollUntilDone(ctx, nil)
   357  		Expect(err).NotTo(HaveOccurred())
   358  
   359  		Logf("deleting an existing route table %q", routeTableName)
   360  		rtPoller, err := routetableClient.BeginDelete(ctx, groupName, routeTableName, nil)
   361  		Expect(err).NotTo(HaveOccurred())
   362  		_, err = rtPoller.PollUntilDone(ctx, nil)
   363  		Expect(err).NotTo(HaveOccurred())
   364  
   365  		Logf("deleting an existing network security group %q", nsgNodeName)
   366  		nsgPoller, err := nsgClient.BeginDelete(ctx, groupName, nsgNodeName, nil)
   367  		Expect(err).NotTo(HaveOccurred())
   368  		_, err = nsgPoller.PollUntilDone(ctx, nil)
   369  		Expect(err).NotTo(HaveOccurred())
   370  
   371  		Logf("deleting an existing network security group %q", nsgName)
   372  		nsgPoller, err = nsgClient.BeginDelete(ctx, groupName, nsgName, nil)
   373  		Expect(err).NotTo(HaveOccurred())
   374  		_, err = nsgPoller.PollUntilDone(ctx, nil)
   375  		Expect(err).NotTo(HaveOccurred())
   376  
   377  		Logf("verifying the existing resource group %q is empty", groupName)
   378  		cred, err := azidentity.NewDefaultAzureCredential(nil)
   379  		Expect(err).NotTo(HaveOccurred())
   380  		resClient, err := armresources.NewClient(getSubscriptionID(Default), cred, nil)
   381  		Expect(err).NotTo(HaveOccurred())
   382  
   383  		Eventually(func() ([]*armresources.GenericResourceExpanded, error) {
   384  			var foundResources []*armresources.GenericResourceExpanded
   385  			opts := armresources.ClientListByResourceGroupOptions{
   386  				Expand: ptr.To("provisioningState"),
   387  				Top:    ptr.To[int32](10),
   388  			}
   389  			pager := resClient.NewListByResourceGroupPager(groupName, &opts)
   390  			for pager.More() {
   391  				page, err := pager.NextPage(ctx)
   392  				if err != nil {
   393  					return nil, err
   394  				}
   395  				foundResources = append(foundResources, page.Value...)
   396  			}
   397  			return foundResources, nil
   398  			// add some tolerance for Azure caching of resource group resources
   399  		}, deleteOperationTimeout, retryableOperationTimeout).Should(BeEmpty(), "Expect the manually created resource group is empty after removing the manually created resources.")
   400  
   401  		Logf("deleting the existing resource group %q", groupName)
   402  		grpPoller, err := groupClient.BeginDelete(ctx, groupName, nil)
   403  		Expect(err).NotTo(HaveOccurred())
   404  		_, err = grpPoller.PollUntilDone(ctx, nil)
   405  		Expect(err).NotTo(HaveOccurred())
   406  	}
   407  }
   408  
   409  // getClientIDforMSI fetches the client ID of a user assigned identity.
   410  func getClientIDforMSI(resourceID string) string {
   411  	subscriptionID := getSubscriptionID(Default)
   412  	cred, err := azidentity.NewDefaultAzureCredential(nil)
   413  	Expect(err).NotTo(HaveOccurred())
   414  
   415  	msiClient, err := armmsi.NewUserAssignedIdentitiesClient(subscriptionID, cred, nil)
   416  	Expect(err).NotTo(HaveOccurred())
   417  
   418  	parsed, err := azureutil.ParseResourceID(resourceID)
   419  	Expect(err).NotTo(HaveOccurred())
   420  
   421  	resp, err := msiClient.Get(context.TODO(), parsed.ResourceGroupName, parsed.Name, nil)
   422  	Expect(err).NotTo(HaveOccurred())
   423  
   424  	return *resp.Properties.ClientID
   425  }