sigs.k8s.io/cluster-api-provider-azure@v1.14.3/azure/services/scalesets/scalesets.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package scalesets
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
    24  	"github.com/pkg/errors"
    25  	azprovider "sigs.k8s.io/cloud-provider-azure/pkg/provider"
    26  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    27  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    28  	"sigs.k8s.io/cluster-api-provider-azure/azure/converters"
    29  	"sigs.k8s.io/cluster-api-provider-azure/azure/services/async"
    30  	"sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus"
    31  	azureutil "sigs.k8s.io/cluster-api-provider-azure/util/azure"
    32  	"sigs.k8s.io/cluster-api-provider-azure/util/slice"
    33  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    34  )
    35  
    36  const serviceName = "scalesets"
    37  
    38  type (
    39  	// ScaleSetScope defines the scope interface for a scale sets service.
    40  	ScaleSetScope interface {
    41  		azure.ClusterDescriber
    42  		azure.AsyncStatusUpdater
    43  		ScaleSetSpec(context.Context) azure.ResourceSpecGetter
    44  		VMSSExtensionSpecs() []azure.ResourceSpecGetter
    45  		SetAnnotation(string, string)
    46  		SetProviderID(string)
    47  		SetVMSSState(*azure.VMSS)
    48  		ReconcileReplicas(context.Context, *azure.VMSS) error
    49  	}
    50  
    51  	// Service provides operations on Azure resources.
    52  	Service struct {
    53  		Scope ScaleSetScope
    54  		Client
    55  		resourceSKUCache *resourceskus.Cache
    56  		async.Reconciler
    57  	}
    58  )
    59  
    60  // New creates a new service.
    61  func New(scope ScaleSetScope, skuCache *resourceskus.Cache) (*Service, error) {
    62  	client, err := NewClient(scope, scope.DefaultedAzureCallTimeout())
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  	return &Service{
    67  		Reconciler: async.New[armcompute.VirtualMachineScaleSetsClientCreateOrUpdateResponse,
    68  			armcompute.VirtualMachineScaleSetsClientDeleteResponse](scope, client, client),
    69  		Client:           client,
    70  		Scope:            scope,
    71  		resourceSKUCache: skuCache,
    72  	}, nil
    73  }
    74  
    75  // Name returns the service name.
    76  func (s *Service) Name() string {
    77  	return serviceName
    78  }
    79  
    80  // Reconcile idempotently gets, creates, and updates a scale set.
    81  func (s *Service) Reconcile(ctx context.Context) (retErr error) {
    82  	ctx, _, done := tele.StartSpanWithLogger(ctx, "scalesets.Service.Reconcile")
    83  	defer done()
    84  
    85  	ctx, cancel := context.WithTimeout(ctx, s.Scope.DefaultedAzureServiceReconcileTimeout())
    86  	defer cancel()
    87  
    88  	if err := s.validateSpec(ctx); err != nil {
    89  		// do as much early validation as possible to limit calls to Azure
    90  		return err
    91  	}
    92  
    93  	spec := s.Scope.ScaleSetSpec(ctx)
    94  	scaleSetSpec, ok := spec.(*ScaleSetSpec)
    95  	if !ok {
    96  		return errors.Errorf("%T is not of type ScaleSetSpec", spec)
    97  	}
    98  
    99  	_, err := s.Client.Get(ctx, spec)
   100  	if err == nil {
   101  		// We can only get the existing instances if the VMSS already exists
   102  		scaleSetSpec.VMSSInstances, err = s.Client.ListInstances(ctx, spec.ResourceGroupName(), spec.ResourceName())
   103  		if err != nil {
   104  			err = errors.Wrapf(err, "failed to get existing VMSS instances")
   105  			s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err)
   106  			return err
   107  		}
   108  	} else if !azure.ResourceNotFound(err) {
   109  		return errors.Wrapf(err, "failed to get existing VMSS")
   110  	}
   111  
   112  	result, err := s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName)
   113  	s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err)
   114  
   115  	if err == nil && result != nil {
   116  		vmss, ok := result.(armcompute.VirtualMachineScaleSet)
   117  		if !ok {
   118  			return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result)
   119  		}
   120  
   121  		fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances)
   122  		if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil {
   123  			return errors.Wrap(err, "unable to reconcile VMSS replicas")
   124  		}
   125  
   126  		// Transform the VMSS resource representation to conform to the cloud-provider-azure representation
   127  		providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID)
   128  		if err != nil {
   129  			return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID)
   130  		}
   131  		s.Scope.SetProviderID(providerID)
   132  		s.Scope.SetVMSSState(&fetchedVMSS)
   133  	}
   134  
   135  	return err
   136  }
   137  
   138  // Delete deletes a scale set asynchronously. Delete sends a DELETE request to Azure and if accepted without error,
   139  // the VMSS will be considered deleted. The actual delete in Azure may take longer, but should eventually complete.
   140  func (s *Service) Delete(ctx context.Context) error {
   141  	ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesets.Service.Delete")
   142  	defer done()
   143  
   144  	ctx, cancel := context.WithTimeout(ctx, s.Scope.DefaultedAzureServiceReconcileTimeout())
   145  	defer cancel()
   146  
   147  	scaleSetSpec := s.Scope.ScaleSetSpec(ctx)
   148  
   149  	defer func() {
   150  		fetchedVMSS, err := s.getVirtualMachineScaleSet(ctx, scaleSetSpec)
   151  		if err != nil && !azure.ResourceNotFound(err) {
   152  			log.Error(err, "failed to get vmss in deferred update")
   153  		}
   154  
   155  		if fetchedVMSS != nil {
   156  			s.Scope.SetVMSSState(fetchedVMSS)
   157  		}
   158  	}()
   159  
   160  	err := s.DeleteResource(ctx, scaleSetSpec, serviceName)
   161  
   162  	s.Scope.UpdateDeleteStatus(infrav1.BootstrapSucceededCondition, serviceName, err)
   163  
   164  	return err
   165  }
   166  
   167  func (s *Service) validateSpec(ctx context.Context) error {
   168  	ctx, _, done := tele.StartSpanWithLogger(ctx, "scalesets.Service.validateSpec")
   169  	defer done()
   170  
   171  	spec := s.Scope.ScaleSetSpec(ctx)
   172  	scaleSetSpec, ok := spec.(*ScaleSetSpec)
   173  	if !ok {
   174  		return errors.Errorf("%T is not a ScaleSetSpec", spec)
   175  	}
   176  
   177  	sku, err := s.resourceSKUCache.Get(ctx, scaleSetSpec.Size, resourceskus.VirtualMachines)
   178  	if err != nil {
   179  		return errors.Wrapf(err, "failed to get SKU %s in compute api", scaleSetSpec.Size)
   180  	}
   181  
   182  	// Checking if the requested VM size has at least 2 vCPUS
   183  	vCPUCapability, err := sku.HasCapabilityWithCapacity(resourceskus.VCPUs, resourceskus.MinimumVCPUS)
   184  	if err != nil {
   185  		return azure.WithTerminalError(errors.Wrap(err, "failed to validate the vCPU capability"))
   186  	}
   187  
   188  	if !vCPUCapability {
   189  		return azure.WithTerminalError(errors.New("vm size should be bigger or equal to at least 2 vCPUs"))
   190  	}
   191  
   192  	// Checking if the requested VM size has at least 2 Gi of memory
   193  	MemoryCapability, err := sku.HasCapabilityWithCapacity(resourceskus.MemoryGB, resourceskus.MinimumMemory)
   194  	if err != nil {
   195  		return azure.WithTerminalError(errors.Wrap(err, "failed to validate the memory capability"))
   196  	}
   197  
   198  	if !MemoryCapability {
   199  		return azure.WithTerminalError(errors.New("vm memory should be bigger or equal to at least 2Gi"))
   200  	}
   201  
   202  	// enable ephemeral OS
   203  	if scaleSetSpec.OSDisk.DiffDiskSettings != nil && !sku.HasCapability(resourceskus.EphemeralOSDisk) {
   204  		return azure.WithTerminalError(fmt.Errorf("vm size %s does not support ephemeral os. select a different vm size or disable ephemeral os", scaleSetSpec.Size))
   205  	}
   206  
   207  	if scaleSetSpec.SecurityProfile != nil && !sku.HasCapability(resourceskus.EncryptionAtHost) {
   208  		return azure.WithTerminalError(errors.Errorf("encryption at host is not supported for VM type %s", scaleSetSpec.Size))
   209  	}
   210  
   211  	// Fetch location and zone to check for their support of ultra disks.
   212  	zones, err := s.resourceSKUCache.GetZones(ctx, scaleSetSpec.Location)
   213  	if err != nil {
   214  		return azure.WithTerminalError(errors.Wrapf(err, "failed to get the zones for location %s", scaleSetSpec.Location))
   215  	}
   216  
   217  	for _, zone := range zones {
   218  		hasLocationCapability := sku.HasLocationCapability(resourceskus.UltraSSDAvailable, scaleSetSpec.Location, zone)
   219  		err := fmt.Errorf("vm size %s does not support ultra disks in location %s. select a different vm size or disable ultra disks", scaleSetSpec.Size, scaleSetSpec.Location)
   220  
   221  		// Check support for ultra disks as data disks.
   222  		for _, disks := range scaleSetSpec.DataDisks {
   223  			if disks.ManagedDisk != nil &&
   224  				disks.ManagedDisk.StorageAccountType == string(armcompute.StorageAccountTypesUltraSSDLRS) &&
   225  				!hasLocationCapability {
   226  				return azure.WithTerminalError(err)
   227  			}
   228  		}
   229  		// Check support for ultra disks as persistent volumes.
   230  		if scaleSetSpec.AdditionalCapabilities != nil && scaleSetSpec.AdditionalCapabilities.UltraSSDEnabled != nil {
   231  			if *scaleSetSpec.AdditionalCapabilities.UltraSSDEnabled &&
   232  				!hasLocationCapability {
   233  				return azure.WithTerminalError(err)
   234  			}
   235  		}
   236  	}
   237  
   238  	// Validate DiagnosticProfile spec
   239  	if scaleSetSpec.DiagnosticsProfile != nil && scaleSetSpec.DiagnosticsProfile.Boot != nil {
   240  		if scaleSetSpec.DiagnosticsProfile.Boot.StorageAccountType == infrav1.UserManagedDiagnosticsStorage {
   241  			if scaleSetSpec.DiagnosticsProfile.Boot.UserManaged == nil {
   242  				return azure.WithTerminalError(fmt.Errorf("userManaged must be specified when storageAccountType is '%s'", infrav1.UserManagedDiagnosticsStorage))
   243  			} else if scaleSetSpec.DiagnosticsProfile.Boot.UserManaged.StorageAccountURI == "" {
   244  				return azure.WithTerminalError(fmt.Errorf("storageAccountURI cannot be empty when storageAccountType is '%s'", infrav1.UserManagedDiagnosticsStorage))
   245  			}
   246  		}
   247  
   248  		possibleStorageAccountTypeValues := []string{
   249  			string(infrav1.DisabledDiagnosticsStorage),
   250  			string(infrav1.ManagedDiagnosticsStorage),
   251  			string(infrav1.UserManagedDiagnosticsStorage),
   252  		}
   253  
   254  		if !slice.Contains(possibleStorageAccountTypeValues, string(scaleSetSpec.DiagnosticsProfile.Boot.StorageAccountType)) {
   255  			return azure.WithTerminalError(fmt.Errorf("invalid storageAccountType: %s. Allowed values are %v",
   256  				scaleSetSpec.DiagnosticsProfile.Boot.StorageAccountType, possibleStorageAccountTypeValues))
   257  		}
   258  	}
   259  
   260  	// Checking if selected availability zones are available selected VM type in location
   261  	azsInLocation, err := s.resourceSKUCache.GetZonesWithVMSize(ctx, scaleSetSpec.Size, scaleSetSpec.Location)
   262  	if err != nil {
   263  		return errors.Wrapf(err, "failed to get zones for VM type %s in location %s", scaleSetSpec.Size, scaleSetSpec.Location)
   264  	}
   265  
   266  	for _, az := range scaleSetSpec.FailureDomains {
   267  		if !slice.Contains(azsInLocation, az) {
   268  			return azure.WithTerminalError(errors.Errorf("availability zone %s is not available for VM type %s in location %s", az, scaleSetSpec.Size, scaleSetSpec.Location))
   269  		}
   270  	}
   271  
   272  	return nil
   273  }
   274  
   275  // getVirtualMachineScaleSet provides information about a Virtual Machine Scale Set and its instances.
   276  func (s *Service) getVirtualMachineScaleSet(ctx context.Context, spec azure.ResourceSpecGetter) (*azure.VMSS, error) {
   277  	ctx, _, done := tele.StartSpanWithLogger(ctx, "scalesets.Service.getVirtualMachineScaleSet")
   278  	defer done()
   279  
   280  	vmssResult, err := s.Client.Get(ctx, spec)
   281  	if err != nil {
   282  		return nil, errors.Wrap(err, "failed to get existing VMSS")
   283  	}
   284  	vmss, ok := vmssResult.(armcompute.VirtualMachineScaleSet)
   285  	if !ok {
   286  		return nil, errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", vmssResult)
   287  	}
   288  
   289  	vmssInstances, err := s.Client.ListInstances(ctx, spec.ResourceGroupName(), spec.ResourceName())
   290  	if err != nil {
   291  		return nil, errors.Wrap(err, "failed to list instances")
   292  	}
   293  
   294  	result := converters.SDKToVMSS(vmss, vmssInstances)
   295  
   296  	return &result, nil
   297  }
   298  
   299  // IsManaged returns always returns true as CAPZ does not support BYO scale set.
   300  func (s *Service) IsManaged(ctx context.Context) (bool, error) {
   301  	return true, nil
   302  }