sigs.k8s.io/cluster-api-provider-azure@v1.14.3/azure/scope/machinepool.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package scope 18 19 import ( 20 "context" 21 "crypto/sha256" 22 "encoding/base64" 23 "fmt" 24 "io" 25 "strings" 26 27 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2" 28 "github.com/pkg/errors" 29 corev1 "k8s.io/api/core/v1" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/types" 32 "k8s.io/klog/v2" 33 "k8s.io/utils/ptr" 34 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 35 "sigs.k8s.io/cluster-api-provider-azure/azure" 36 machinepool "sigs.k8s.io/cluster-api-provider-azure/azure/scope/strategies/machinepool_deployments" 37 "sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus" 38 "sigs.k8s.io/cluster-api-provider-azure/azure/services/roleassignments" 39 "sigs.k8s.io/cluster-api-provider-azure/azure/services/scalesets" 40 "sigs.k8s.io/cluster-api-provider-azure/azure/services/virtualmachineimages" 41 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 42 azureutil "sigs.k8s.io/cluster-api-provider-azure/util/azure" 43 "sigs.k8s.io/cluster-api-provider-azure/util/futures" 44 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 45 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 46 capierrors "sigs.k8s.io/cluster-api/errors" 47 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 48 "sigs.k8s.io/cluster-api/util" 49 "sigs.k8s.io/cluster-api/util/annotations" 50 "sigs.k8s.io/cluster-api/util/conditions" 51 "sigs.k8s.io/cluster-api/util/labels/format" 52 "sigs.k8s.io/cluster-api/util/patch" 53 "sigs.k8s.io/controller-runtime/pkg/client" 54 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 55 ) 56 57 // ScalesetsServiceName is the name of the scalesets service. 58 // TODO: move this to scalesets.go once we remove the usage in this package, 59 // added here to avoid a circular dependency. 60 const ScalesetsServiceName = "scalesets" 61 62 type ( 63 // MachinePoolScopeParams defines the input parameters used to create a new MachinePoolScope. 64 MachinePoolScopeParams struct { 65 Client client.Client 66 MachinePool *expv1.MachinePool 67 AzureMachinePool *infrav1exp.AzureMachinePool 68 ClusterScope azure.ClusterScoper 69 Cache *MachinePoolCache 70 } 71 72 // MachinePoolScope defines a scope defined around a machine pool and its cluster. 73 MachinePoolScope struct { 74 azure.ClusterScoper 75 AzureMachinePool *infrav1exp.AzureMachinePool 76 MachinePool *expv1.MachinePool 77 client client.Client 78 patchHelper *patch.Helper 79 capiMachinePoolPatchHelper *patch.Helper 80 vmssState *azure.VMSS 81 cache *MachinePoolCache 82 } 83 84 // NodeStatus represents the status of a Kubernetes node. 85 NodeStatus struct { 86 Ready bool 87 Version string 88 } 89 90 // MachinePoolCache stores common machine pool information so we don't have to hit the API multiple times within the same reconcile loop. 91 MachinePoolCache struct { 92 BootstrapData string 93 HasBootstrapDataChanges bool 94 VMImage *infrav1.Image 95 VMSKU resourceskus.SKU 96 MaxSurge int 97 } 98 ) 99 100 // NewMachinePoolScope creates a new MachinePoolScope from the supplied parameters. 101 // This is meant to be called for each reconcile iteration. 102 func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) { 103 if params.Client == nil { 104 return nil, errors.New("client is required when creating a MachinePoolScope") 105 } 106 107 if params.MachinePool == nil { 108 return nil, errors.New("machine pool is required when creating a MachinePoolScope") 109 } 110 111 if params.AzureMachinePool == nil { 112 return nil, errors.New("azure machine pool is required when creating a MachinePoolScope") 113 } 114 115 helper, err := patch.NewHelper(params.AzureMachinePool, params.Client) 116 if err != nil { 117 return nil, errors.Wrap(err, "failed to init patch helper") 118 } 119 120 capiMachinePoolPatchHelper, err := patch.NewHelper(params.MachinePool, params.Client) 121 if err != nil { 122 return nil, errors.Wrap(err, "failed to init capi patch helper") 123 } 124 125 return &MachinePoolScope{ 126 client: params.Client, 127 MachinePool: params.MachinePool, 128 AzureMachinePool: params.AzureMachinePool, 129 patchHelper: helper, 130 capiMachinePoolPatchHelper: capiMachinePoolPatchHelper, 131 ClusterScoper: params.ClusterScope, 132 }, nil 133 } 134 135 // InitMachinePoolCache sets cached information about the machine pool to be used in the scope. 136 func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error { 137 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.InitMachinePoolCache") 138 defer done() 139 140 if m.cache == nil { 141 var err error 142 m.cache = &MachinePoolCache{} 143 144 m.cache.BootstrapData, err = m.GetBootstrapData(ctx) 145 if err != nil { 146 return err 147 } 148 149 m.cache.HasBootstrapDataChanges, err = m.HasBootstrapDataChanges(ctx) 150 if err != nil { 151 return err 152 } 153 154 m.cache.VMImage, err = m.GetVMImage(ctx) 155 if err != nil { 156 return err 157 } 158 m.SaveVMImageToStatus(m.cache.VMImage) 159 160 m.cache.MaxSurge, err = m.MaxSurge() 161 if err != nil { 162 return err 163 } 164 165 skuCache, err := resourceskus.GetCache(m, m.Location()) 166 if err != nil { 167 return err 168 } 169 170 m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines) 171 if err != nil { 172 return errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachinePool.Spec.Template.VMSize) 173 } 174 } 175 176 return nil 177 } 178 179 // ScaleSetSpec returns the scale set spec. 180 func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecGetter { 181 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.ScaleSetSpec") 182 defer done() 183 184 spec := &scalesets.ScaleSetSpec{ 185 Name: m.Name(), 186 ResourceGroup: m.NodeResourceGroup(), 187 Size: m.AzureMachinePool.Spec.Template.VMSize, 188 Capacity: int64(ptr.Deref[int32](m.MachinePool.Spec.Replicas, 0)), 189 SSHKeyData: m.AzureMachinePool.Spec.Template.SSHPublicKey, 190 OSDisk: m.AzureMachinePool.Spec.Template.OSDisk, 191 DataDisks: m.AzureMachinePool.Spec.Template.DataDisks, 192 SubnetName: m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].SubnetName, 193 VNetName: m.Vnet().Name, 194 VNetResourceGroup: m.Vnet().ResourceGroup, 195 PublicLBName: m.OutboundLBName(infrav1.Node), 196 PublicLBAddressPoolName: m.OutboundPoolName(infrav1.Node), 197 AcceleratedNetworking: m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].AcceleratedNetworking, 198 Identity: m.AzureMachinePool.Spec.Identity, 199 UserAssignedIdentities: m.AzureMachinePool.Spec.UserAssignedIdentities, 200 DiagnosticsProfile: m.AzureMachinePool.Spec.Template.Diagnostics, 201 SecurityProfile: m.AzureMachinePool.Spec.Template.SecurityProfile, 202 SpotVMOptions: m.AzureMachinePool.Spec.Template.SpotVMOptions, 203 FailureDomains: m.MachinePool.Spec.FailureDomains, 204 TerminateNotificationTimeout: m.AzureMachinePool.Spec.Template.TerminateNotificationTimeout, 205 NetworkInterfaces: m.AzureMachinePool.Spec.Template.NetworkInterfaces, 206 IPv6Enabled: m.IsIPv6Enabled(), 207 OrchestrationMode: m.AzureMachinePool.Spec.OrchestrationMode, 208 Location: m.AzureMachinePool.Spec.Location, 209 SubscriptionID: m.SubscriptionID(), 210 HasReplicasExternallyManaged: m.HasReplicasExternallyManaged(ctx), 211 ClusterName: m.ClusterName(), 212 AdditionalTags: m.AzureMachinePool.Spec.AdditionalTags, 213 PlatformFaultDomainCount: m.AzureMachinePool.Spec.PlatformFaultDomainCount, 214 ZoneBalance: m.AzureMachinePool.Spec.ZoneBalance, 215 } 216 217 if m.AzureMachinePool.Spec.ZoneBalance != nil && len(m.MachinePool.Spec.FailureDomains) <= 1 { 218 log.V(4).Info("zone balance is enabled but one or less failure domains are specified, zone balance will be disabled") 219 spec.ZoneBalance = nil 220 } 221 222 if m.cache != nil { 223 if m.HasReplicasExternallyManaged(ctx) { 224 spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges 225 log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData) 226 } 227 spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs() 228 spec.SKU = m.cache.VMSKU 229 spec.VMImage = m.cache.VMImage 230 spec.BootstrapData = m.cache.BootstrapData 231 spec.MaxSurge = m.cache.MaxSurge 232 } else { 233 log.V(4).Info("machinepool cache is nil, this is only expected when deleting a machinepool") 234 } 235 236 return spec 237 } 238 239 // Name returns the Azure Machine Pool Name. 240 func (m *MachinePoolScope) Name() string { 241 // Windows Machine pools names cannot be longer than 9 chars 242 if m.AzureMachinePool.Spec.Template.OSDisk.OSType == azure.WindowsOS && len(m.AzureMachinePool.Name) > 9 { 243 return "win-" + m.AzureMachinePool.Name[len(m.AzureMachinePool.Name)-5:] 244 } 245 return m.AzureMachinePool.Name 246 } 247 248 // SetInfrastructureMachineKind sets the infrastructure machine kind in the status if it is not set already, returning 249 // `true` if the status was updated. This supports MachinePool Machines. 250 func (m *MachinePoolScope) SetInfrastructureMachineKind() bool { 251 if m.AzureMachinePool.Status.InfrastructureMachineKind != infrav1exp.AzureMachinePoolMachineKind { 252 m.AzureMachinePool.Status.InfrastructureMachineKind = infrav1exp.AzureMachinePoolMachineKind 253 254 return true 255 } 256 257 return false 258 } 259 260 // ProviderID returns the AzureMachinePool ID by parsing Spec.ProviderID. 261 func (m *MachinePoolScope) ProviderID() string { 262 resourceID, err := azureutil.ParseResourceID(m.AzureMachinePool.Spec.ProviderID) 263 if err != nil { 264 return "" 265 } 266 return resourceID.Name 267 } 268 269 // SetProviderID sets the AzureMachinePool providerID in spec. 270 func (m *MachinePoolScope) SetProviderID(v string) { 271 m.AzureMachinePool.Spec.ProviderID = v 272 } 273 274 // SystemAssignedIdentityName returns the scope for the system assigned identity. 275 func (m *MachinePoolScope) SystemAssignedIdentityName() string { 276 if m.AzureMachinePool.Spec.SystemAssignedIdentityRole != nil { 277 return m.AzureMachinePool.Spec.SystemAssignedIdentityRole.Name 278 } 279 return "" 280 } 281 282 // SystemAssignedIdentityScope returns the scope for the system assigned identity. 283 func (m *MachinePoolScope) SystemAssignedIdentityScope() string { 284 if m.AzureMachinePool.Spec.SystemAssignedIdentityRole != nil { 285 return m.AzureMachinePool.Spec.SystemAssignedIdentityRole.Scope 286 } 287 return "" 288 } 289 290 // SystemAssignedIdentityDefinitionID returns the role definition ID for the system assigned identity. 291 func (m *MachinePoolScope) SystemAssignedIdentityDefinitionID() string { 292 if m.AzureMachinePool.Spec.SystemAssignedIdentityRole != nil { 293 return m.AzureMachinePool.Spec.SystemAssignedIdentityRole.DefinitionID 294 } 295 return "" 296 } 297 298 // ProvisioningState returns the AzureMachinePool provisioning state. 299 func (m *MachinePoolScope) ProvisioningState() infrav1.ProvisioningState { 300 if m.AzureMachinePool.Status.ProvisioningState != nil { 301 return *m.AzureMachinePool.Status.ProvisioningState 302 } 303 return "" 304 } 305 306 // SetVMSSState updates the machine pool scope with the current state of the VMSS. 307 func (m *MachinePoolScope) SetVMSSState(vmssState *azure.VMSS) { 308 m.vmssState = vmssState 309 } 310 311 // NeedsRequeue return true if any machines are not on the latest model or the VMSS is not in a terminal provisioning 312 // state. 313 func (m *MachinePoolScope) NeedsRequeue() bool { 314 state := m.AzureMachinePool.Status.ProvisioningState 315 if m.vmssState == nil { 316 return state != nil && infrav1.IsTerminalProvisioningState(*state) 317 } 318 319 if !m.vmssState.HasLatestModelAppliedToAll() { 320 return true 321 } 322 323 desiredMatchesActual := len(m.vmssState.Instances) == int(m.DesiredReplicas()) 324 return !(state != nil && infrav1.IsTerminalProvisioningState(*state) && desiredMatchesActual) 325 } 326 327 // DesiredReplicas returns the replica count on machine pool or 0 if machine pool replicas is nil. 328 func (m MachinePoolScope) DesiredReplicas() int32 { 329 return ptr.Deref[int32](m.MachinePool.Spec.Replicas, 0) 330 } 331 332 // MaxSurge returns the number of machines to surge, or 0 if the deployment strategy does not support surge. 333 func (m MachinePoolScope) MaxSurge() (int, error) { 334 if surger, ok := m.getDeploymentStrategy().(machinepool.Surger); ok { 335 surgeCount, err := surger.Surge(int(m.DesiredReplicas())) 336 if err != nil { 337 return 0, errors.Wrap(err, "failed to calculate surge for the machine pool") 338 } 339 340 return surgeCount, nil 341 } 342 343 return 0, nil 344 } 345 346 // updateReplicasAndProviderIDs ties the Azure VMSS instance data and the Node status data together to build and update 347 // the AzureMachinePool replica count and providerIDList. 348 func (m *MachinePoolScope) updateReplicasAndProviderIDs(ctx context.Context) error { 349 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.UpdateInstanceStatuses") 350 defer done() 351 352 machines, err := m.GetMachinePoolMachines(ctx) 353 if err != nil { 354 return errors.Wrap(err, "failed to get machine pool machines") 355 } 356 357 var readyReplicas int32 358 providerIDs := make([]string, len(machines)) 359 for i, machine := range machines { 360 if machine.Status.Ready { 361 readyReplicas++ 362 } 363 providerIDs[i] = machine.Spec.ProviderID 364 } 365 366 m.AzureMachinePool.Status.Replicas = readyReplicas 367 m.AzureMachinePool.Spec.ProviderIDList = providerIDs 368 return nil 369 } 370 371 func (m *MachinePoolScope) getMachinePoolMachineLabels() map[string]string { 372 return map[string]string{ 373 clusterv1.ClusterNameLabel: m.ClusterName(), 374 infrav1exp.MachinePoolNameLabel: m.AzureMachinePool.Name, 375 clusterv1.MachinePoolNameLabel: format.MustFormatValue(m.MachinePool.Name), 376 m.ClusterName(): string(infrav1.ResourceLifecycleOwned), 377 } 378 } 379 380 // GetMachinePoolMachines returns the list of AzureMachinePoolMachines associated with this AzureMachinePool. 381 func (m *MachinePoolScope) GetMachinePoolMachines(ctx context.Context) ([]infrav1exp.AzureMachinePoolMachine, error) { 382 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.getMachinePoolMachines") 383 defer done() 384 385 labels := m.getMachinePoolMachineLabels() 386 ampml := &infrav1exp.AzureMachinePoolMachineList{} 387 if err := m.client.List(ctx, ampml, client.InNamespace(m.AzureMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { 388 return nil, errors.Wrap(err, "failed to list AzureMachinePoolMachines") 389 } 390 391 return ampml.Items, nil 392 } 393 394 func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) error { 395 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.applyAzureMachinePoolMachines") 396 defer done() 397 398 if m.vmssState == nil { 399 return nil 400 } 401 402 ampms, err := m.GetMachinePoolMachines(ctx) 403 if err != nil { 404 return err 405 } 406 407 existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampms)) 408 for _, machine := range ampms { 409 existingMachinesByProviderID[machine.Spec.ProviderID] = machine 410 } 411 412 // determine which machines need to be created to reflect the current state in Azure 413 azureMachinesByProviderID := m.vmssState.InstancesByProviderID(m.AzureMachinePool.Spec.OrchestrationMode) 414 for key, val := range azureMachinesByProviderID { 415 if _, ok := existingMachinesByProviderID[key]; !ok { 416 log.V(4).Info("creating AzureMachinePoolMachine", "providerID", key) 417 if err := m.createMachine(ctx, val); err != nil { 418 return errors.Wrap(err, "failed creating AzureMachinePoolMachine") 419 } 420 continue 421 } 422 } 423 424 deleted := false 425 // Delete MachinePool Machines for instances that no longer exist in Azure, i.e. deleted out-of-band 426 for key, ampm := range existingMachinesByProviderID { 427 ampm := ampm 428 if _, ok := azureMachinesByProviderID[key]; !ok { 429 deleted = true 430 log.V(4).Info("deleting AzureMachinePoolMachine because it no longer exists in the VMSS", "providerID", key) 431 delete(existingMachinesByProviderID, key) 432 if err := m.DeleteMachine(ctx, ampm); err != nil { 433 return errors.Wrap(err, "failed deleting AzureMachinePoolMachine no longer existing in Azure") 434 } 435 } 436 } 437 438 if deleted { 439 log.V(4).Info("exiting early due to finding AzureMachinePoolMachine(s) that were deleted because they no longer exist in the VMSS") 440 // exit early to be less greedy about delete 441 return nil 442 } 443 444 if futures.Has(m.AzureMachinePool, m.Name(), ScalesetsServiceName, infrav1.PatchFuture) || 445 futures.Has(m.AzureMachinePool, m.Name(), ScalesetsServiceName, infrav1.PutFuture) || 446 futures.Has(m.AzureMachinePool, m.Name(), ScalesetsServiceName, infrav1.DeleteFuture) { 447 log.V(4).Info("exiting early due an in-progress long running operation on the ScaleSet") 448 // exit early to be less greedy about delete 449 return nil 450 } 451 452 // when replicas are externally managed, we do not want to scale down manually since that is handled by the external scaler. 453 if m.HasReplicasExternallyManaged(ctx) { 454 log.V(4).Info("exiting early due to replicas externally managed") 455 return nil 456 } 457 458 deleteSelector := m.getDeploymentStrategy() 459 if deleteSelector == nil { 460 log.V(4).Info("can not select AzureMachinePoolMachines to delete because no deployment strategy is specified") 461 return nil 462 } 463 464 // Select Machines to delete to lower the replica count 465 toDelete, err := deleteSelector.SelectMachinesToDelete(ctx, m.DesiredReplicas(), existingMachinesByProviderID) 466 if err != nil { 467 return errors.Wrap(err, "failed selecting AzureMachinePoolMachine(s) to delete") 468 } 469 470 // Delete MachinePool Machines as a part of scaling down 471 for i := range toDelete { 472 ampm := toDelete[i] 473 log.Info("deleting selected AzureMachinePoolMachine", "providerID", ampm.Spec.ProviderID) 474 if err := m.DeleteMachine(ctx, ampm); err != nil { 475 return errors.Wrap(err, "failed deleting AzureMachinePoolMachine to reduce replica count") 476 } 477 } 478 479 log.V(4).Info("done reconciling AzureMachinePoolMachine(s)") 480 return nil 481 } 482 483 func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSSVM) error { 484 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.createMachine") 485 defer done() 486 487 parsed, err := azureutil.ParseResourceID(machine.ID) 488 if err != nil { 489 return errors.Wrap(err, fmt.Sprintf("failed to parse resource id %q", machine.ID)) 490 } 491 instanceID := strings.ReplaceAll(parsed.Name, "_", "-") 492 493 ampm := infrav1exp.AzureMachinePoolMachine{ 494 ObjectMeta: metav1.ObjectMeta{ 495 Name: m.AzureMachinePool.Name + "-" + instanceID, 496 Namespace: m.AzureMachinePool.Namespace, 497 OwnerReferences: []metav1.OwnerReference{ 498 { 499 APIVersion: infrav1exp.GroupVersion.String(), 500 Kind: infrav1.AzureMachinePoolKind, 501 Name: m.AzureMachinePool.Name, 502 BlockOwnerDeletion: ptr.To(true), 503 UID: m.AzureMachinePool.UID, 504 }, 505 }, 506 Annotations: map[string]string{}, 507 }, 508 Spec: infrav1exp.AzureMachinePoolMachineSpec{ 509 ProviderID: machine.ProviderID(), 510 InstanceID: machine.InstanceID, 511 }, 512 } 513 514 labels := m.getMachinePoolMachineLabels() 515 ampm.Labels = labels 516 517 controllerutil.AddFinalizer(&m, infrav1exp.AzureMachinePoolMachineFinalizer) 518 conditions.MarkFalse(&m, infrav1.VMRunningCondition, string(infrav1.Creating), clusterv1.ConditionSeverityInfo, "") 519 if err := m.client.Create(ctx, &m); err != nil { 520 return errors.Wrapf(err, "failed creating AzureMachinePoolMachine %s in AzureMachinePool %s", machine.ID, m.AzureMachinePool.Name) 521 } 522 523 return nil 524 } 525 526 // DeleteMachine deletes an AzureMachinePoolMachine by fetching its owner Machine and deleting it. This ensures that the node cordon/drain happens before deleting the infrastructure. 527 func (m *MachinePoolScope) DeleteMachine(ctx context.Context, ampm infrav1exp.AzureMachinePoolMachine) error { 528 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.DeleteMachine") 529 defer done() 530 531 machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta) 532 if err != nil { 533 return errors.Wrapf(err, "error getting owner Machine for AzureMachinePoolMachine %s/%s", ampm.Namespace, ampm.Name) 534 } 535 if machine == nil { 536 log.V(2).Info("No owner Machine exists for AzureMachinePoolMachine", ampm, klog.KObj(&m)) 537 // If the AzureMachinePoolMachine does not have an owner Machine, do not attempt to delete the AzureMachinePoolMachine as the MachinePool controller will create the 538 // Machine and we want to let it catch up. If we are too hasty to delete, that introduces a race condition where the AzureMachinePoolMachine could be deleted 539 // just as the Machine comes online. 540 541 // In the case where the MachinePool is being deleted and the Machine will never come online, the AzureMachinePoolMachine will be deleted via its ownerRef to the 542 // AzureMachinePool, so that is covered as well. 543 544 return nil 545 } 546 547 if err := m.client.Delete(ctx, machine); err != nil { 548 return errors.Wrapf(err, "failed to delete Machine %s for AzureMachinePoolMachine %s in MachinePool %s", machine.Name, ampm.Name, m.MachinePool.Name) 549 } 550 551 return nil 552 } 553 554 // SetLongRunningOperationState will set the future on the AzureMachinePool status to allow the resource to continue 555 // in the next reconciliation. 556 func (m *MachinePoolScope) SetLongRunningOperationState(future *infrav1.Future) { 557 futures.Set(m.AzureMachinePool, future) 558 } 559 560 // GetLongRunningOperationState will get the future on the AzureMachinePool status. 561 func (m *MachinePoolScope) GetLongRunningOperationState(name, service, futureType string) *infrav1.Future { 562 return futures.Get(m.AzureMachinePool, name, service, futureType) 563 } 564 565 // DeleteLongRunningOperationState will delete the future from the AzureMachinePool status. 566 func (m *MachinePoolScope) DeleteLongRunningOperationState(name, service, futureType string) { 567 futures.Delete(m.AzureMachinePool, name, service, futureType) 568 } 569 570 // setProvisioningStateAndConditions sets the AzureMachinePool provisioning state and conditions. 571 func (m *MachinePoolScope) setProvisioningStateAndConditions(v infrav1.ProvisioningState) { 572 m.AzureMachinePool.Status.ProvisioningState = &v 573 switch { 574 case v == infrav1.Succeeded && *m.MachinePool.Spec.Replicas == m.AzureMachinePool.Status.Replicas: 575 // vmss is provisioned with enough ready replicas 576 conditions.MarkTrue(m.AzureMachinePool, infrav1.ScaleSetRunningCondition) 577 conditions.MarkTrue(m.AzureMachinePool, infrav1.ScaleSetModelUpdatedCondition) 578 conditions.MarkTrue(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition) 579 m.SetReady() 580 case v == infrav1.Succeeded && *m.MachinePool.Spec.Replicas != m.AzureMachinePool.Status.Replicas: 581 // not enough ready or too many ready replicas we must still be scaling up or down 582 updatingState := infrav1.Updating 583 m.AzureMachinePool.Status.ProvisioningState = &updatingState 584 if *m.MachinePool.Spec.Replicas > m.AzureMachinePool.Status.Replicas { 585 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition, infrav1.ScaleSetScaleUpReason, clusterv1.ConditionSeverityInfo, "") 586 } else { 587 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition, infrav1.ScaleSetScaleDownReason, clusterv1.ConditionSeverityInfo, "") 588 } 589 m.SetNotReady() 590 case v == infrav1.Updating: 591 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetModelUpdatedCondition, infrav1.ScaleSetModelOutOfDateReason, clusterv1.ConditionSeverityInfo, "") 592 m.SetNotReady() 593 case v == infrav1.Creating: 594 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetCreatingReason, clusterv1.ConditionSeverityInfo, "") 595 m.SetNotReady() 596 case v == infrav1.Deleting: 597 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetDeletingReason, clusterv1.ConditionSeverityInfo, "") 598 m.SetNotReady() 599 default: 600 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, string(v), clusterv1.ConditionSeverityInfo, "") 601 m.SetNotReady() 602 } 603 } 604 605 // SetReady sets the AzureMachinePool Ready Status to true. 606 func (m *MachinePoolScope) SetReady() { 607 m.AzureMachinePool.Status.Ready = true 608 } 609 610 // SetNotReady sets the AzureMachinePool Ready Status to false. 611 func (m *MachinePoolScope) SetNotReady() { 612 m.AzureMachinePool.Status.Ready = false 613 } 614 615 // SetFailureMessage sets the AzureMachinePool status failure message. 616 func (m *MachinePoolScope) SetFailureMessage(v error) { 617 m.AzureMachinePool.Status.FailureMessage = ptr.To(v.Error()) 618 } 619 620 // SetFailureReason sets the AzureMachinePool status failure reason. 621 func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) { 622 m.AzureMachinePool.Status.FailureReason = &v 623 } 624 625 // AdditionalTags merges AdditionalTags from the scope's AzureCluster and AzureMachinePool. If the same key is present in both, 626 // the value from AzureMachinePool takes precedence. 627 func (m *MachinePoolScope) AdditionalTags() infrav1.Tags { 628 tags := make(infrav1.Tags) 629 // Start with the cluster-wide tags... 630 tags.Merge(m.ClusterScoper.AdditionalTags()) 631 // ... and merge in the Machine Pool's 632 tags.Merge(m.AzureMachinePool.Spec.AdditionalTags) 633 // Set the cloud provider tag 634 tags[infrav1.ClusterAzureCloudProviderTagKey(m.ClusterName())] = string(infrav1.ResourceLifecycleOwned) 635 636 return tags 637 } 638 639 // SetAnnotation sets a key value annotation on the AzureMachinePool. 640 func (m *MachinePoolScope) SetAnnotation(key, value string) { 641 if m.AzureMachinePool.Annotations == nil { 642 m.AzureMachinePool.Annotations = map[string]string{} 643 } 644 m.AzureMachinePool.Annotations[key] = value 645 } 646 647 // PatchObject persists the AzureMachinePool spec and status. 648 func (m *MachinePoolScope) PatchObject(ctx context.Context) error { 649 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.PatchObject") 650 defer done() 651 652 conditions.SetSummary(m.AzureMachinePool) 653 return m.patchHelper.Patch( 654 ctx, 655 m.AzureMachinePool, 656 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 657 clusterv1.ReadyCondition, 658 infrav1.BootstrapSucceededCondition, 659 infrav1.ScaleSetDesiredReplicasCondition, 660 infrav1.ScaleSetModelUpdatedCondition, 661 infrav1.ScaleSetRunningCondition, 662 }}) 663 } 664 665 // Close the MachinePoolScope by updating the AzureMachinePool spec and AzureMachinePool status. 666 func (m *MachinePoolScope) Close(ctx context.Context) error { 667 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.Close") 668 defer done() 669 670 if m.vmssState != nil { 671 if err := m.applyAzureMachinePoolMachines(ctx); err != nil { 672 log.Error(err, "failed to apply changes to the AzureMachinePoolMachines") 673 return errors.Wrap(err, "failed to apply changes to AzureMachinePoolMachines") 674 } 675 676 m.setProvisioningStateAndConditions(m.vmssState.State) 677 if err := m.updateReplicasAndProviderIDs(ctx); err != nil { 678 return errors.Wrap(err, "failed to update replicas and providerIDs") 679 } 680 if m.HasReplicasExternallyManaged(ctx) { 681 if err := m.updateCustomDataHash(ctx); err != nil { 682 // ignore errors to calculating the custom data hash since it's not absolutely crucial. 683 log.V(4).Error(err, "unable to update custom data hash, ignoring.") 684 } 685 } 686 } 687 688 if err := m.PatchObject(ctx); err != nil { 689 return errors.Wrap(err, "unable to patch AzureMachinePool") 690 } 691 if err := m.PatchCAPIMachinePoolObject(ctx); err != nil { 692 return errors.Wrap(err, "unable to patch CAPI MachinePool") 693 } 694 return nil 695 } 696 697 // GetBootstrapData returns the bootstrap data from the secret in the MachinePool's bootstrap.dataSecretName. 698 func (m *MachinePoolScope) GetBootstrapData(ctx context.Context) (string, error) { 699 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetBootstrapData") 700 defer done() 701 702 dataSecretName := m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName 703 if dataSecretName == nil { 704 return "", errors.New("error retrieving bootstrap data: linked MachinePool Spec's bootstrap.dataSecretName is nil") 705 } 706 secret := &corev1.Secret{} 707 key := types.NamespacedName{Namespace: m.AzureMachinePool.Namespace, Name: *dataSecretName} 708 if err := m.client.Get(ctx, key, secret); err != nil { 709 return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for AzureMachinePool %s/%s", m.AzureMachinePool.Namespace, m.Name()) 710 } 711 712 value, ok := secret.Data["value"] 713 if !ok { 714 return "", errors.New("error retrieving bootstrap data: secret value key is missing") 715 } 716 return base64.StdEncoding.EncodeToString(value), nil 717 } 718 719 // calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data. 720 func (m *MachinePoolScope) calculateBootstrapDataHash(_ context.Context) (string, error) { 721 bootstrapData := m.cache.BootstrapData 722 h := sha256.New() 723 n, err := io.WriteString(h, bootstrapData) 724 if err != nil || n == 0 { 725 return "", fmt.Errorf("unable to write custom data (bytes written: %q): %w", n, err) 726 } 727 return fmt.Sprintf("%x", h.Sum(nil)), nil 728 } 729 730 // HasBootstrapDataChanges calculates the sha256 hash of the bootstrap data and compares it with the saved hash in AzureMachinePool.Status. 731 func (m *MachinePoolScope) HasBootstrapDataChanges(ctx context.Context) (bool, error) { 732 newHash, err := m.calculateBootstrapDataHash(ctx) 733 if err != nil { 734 return false, err 735 } 736 return m.AzureMachinePool.GetAnnotations()[azure.CustomDataHashAnnotation] != newHash, nil 737 } 738 739 // updateCustomDataHash calculates the sha256 hash of the bootstrap data and saves it in AzureMachinePool.Status. 740 func (m *MachinePoolScope) updateCustomDataHash(ctx context.Context) error { 741 newHash, err := m.calculateBootstrapDataHash(ctx) 742 if err != nil { 743 return err 744 } 745 m.SetAnnotation(azure.CustomDataHashAnnotation, newHash) 746 return nil 747 } 748 749 // GetVMImage picks an image from the AzureMachinePool configuration, or uses a default one. 750 func (m *MachinePoolScope) GetVMImage(ctx context.Context) (*infrav1.Image, error) { 751 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetVMImage") 752 defer done() 753 754 // Use custom Marketplace image, Image ID or a Shared Image Gallery image if provided 755 if m.AzureMachinePool.Spec.Template.Image != nil { 756 return m.AzureMachinePool.Spec.Template.Image, nil 757 } 758 759 var ( 760 err error 761 defaultImage *infrav1.Image 762 ) 763 764 svc, err := virtualmachineimages.New(m) 765 if err != nil { 766 return nil, errors.Wrap(err, "failed to create virtualmachineimages service") 767 } 768 769 if m.AzureMachinePool.Spec.Template.OSDisk.OSType == azure.WindowsOS { 770 runtime := m.AzureMachinePool.Annotations["runtime"] 771 windowsServerVersion := m.AzureMachinePool.Annotations["windowsServerVersion"] 772 log.V(4).Info("No image specified for machine, using default Windows Image", "machine", m.MachinePool.GetName(), "runtime", runtime, "windowsServerVersion", windowsServerVersion) 773 defaultImage, err = svc.GetDefaultWindowsImage(ctx, m.Location(), ptr.Deref(m.MachinePool.Spec.Template.Spec.Version, ""), runtime, windowsServerVersion) 774 } else { 775 defaultImage, err = svc.GetDefaultUbuntuImage(ctx, m.Location(), ptr.Deref(m.MachinePool.Spec.Template.Spec.Version, "")) 776 } 777 778 if err != nil { 779 return defaultImage, errors.Wrap(err, "failed to get default OS image") 780 } 781 782 return defaultImage, nil 783 } 784 785 // SaveVMImageToStatus persists the AzureMachinePool image to the status. 786 func (m *MachinePoolScope) SaveVMImageToStatus(image *infrav1.Image) { 787 m.AzureMachinePool.Status.Image = image 788 } 789 790 // RoleAssignmentSpecs returns the role assignment specs. 791 func (m *MachinePoolScope) RoleAssignmentSpecs(principalID *string) []azure.ResourceSpecGetter { 792 roles := make([]azure.ResourceSpecGetter, 1) 793 if m.HasSystemAssignedIdentity() { 794 roles[0] = &roleassignments.RoleAssignmentSpec{ 795 Name: m.SystemAssignedIdentityName(), 796 MachineName: m.Name(), 797 ResourceGroup: m.NodeResourceGroup(), 798 ResourceType: azure.VirtualMachineScaleSet, 799 Scope: m.SystemAssignedIdentityScope(), 800 RoleDefinitionID: m.SystemAssignedIdentityDefinitionID(), 801 PrincipalID: principalID, 802 PrincipalType: armauthorization.PrincipalTypeServicePrincipal, 803 } 804 return roles 805 } 806 return []azure.ResourceSpecGetter{} 807 } 808 809 // RoleAssignmentResourceType returns the role assignment resource type. 810 func (m *MachinePoolScope) RoleAssignmentResourceType() string { 811 return azure.VirtualMachineScaleSet 812 } 813 814 // HasSystemAssignedIdentity returns true if the azure machine pool has system 815 // assigned identity. 816 func (m *MachinePoolScope) HasSystemAssignedIdentity() bool { 817 return m.AzureMachinePool.Spec.Identity == infrav1.VMIdentitySystemAssigned 818 } 819 820 // VMSSExtensionSpecs returns the VMSS extension specs. 821 func (m *MachinePoolScope) VMSSExtensionSpecs() []azure.ResourceSpecGetter { 822 var extensionSpecs = []azure.ResourceSpecGetter{} 823 824 for _, extension := range m.AzureMachinePool.Spec.Template.VMExtensions { 825 extensionSpecs = append(extensionSpecs, &scalesets.VMSSExtensionSpec{ 826 ExtensionSpec: azure.ExtensionSpec{ 827 Name: extension.Name, 828 VMName: m.Name(), 829 Publisher: extension.Publisher, 830 Version: extension.Version, 831 Settings: extension.Settings, 832 ProtectedSettings: extension.ProtectedSettings, 833 }, 834 ResourceGroup: m.NodeResourceGroup(), 835 }) 836 } 837 838 cpuArchitectureType, _ := m.cache.VMSKU.GetCapability(resourceskus.CPUArchitectureType) 839 bootstrapExtensionSpec := azure.GetBootstrappingVMExtension(m.AzureMachinePool.Spec.Template.OSDisk.OSType, m.CloudEnvironment(), m.Name(), cpuArchitectureType) 840 841 if bootstrapExtensionSpec != nil { 842 extensionSpecs = append(extensionSpecs, &scalesets.VMSSExtensionSpec{ 843 ExtensionSpec: *bootstrapExtensionSpec, 844 ResourceGroup: m.NodeResourceGroup(), 845 }) 846 } 847 848 return extensionSpecs 849 } 850 851 func (m *MachinePoolScope) getDeploymentStrategy() machinepool.TypedDeleteSelector { 852 if m.AzureMachinePool == nil { 853 return nil 854 } 855 856 return machinepool.NewMachinePoolDeploymentStrategy(m.AzureMachinePool.Spec.Strategy) 857 } 858 859 // SetSubnetName defaults the AzureMachinePool subnet name to the name of the subnet with role 'node' when there is only one of them. 860 // Note: this logic exists only for purposes of ensuring backwards compatibility for old clusters created without the `subnetName` field being 861 // set, and should be removed in the future when this field is no longer optional. 862 func (m *MachinePoolScope) SetSubnetName() error { 863 if m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].SubnetName == "" { 864 subnetName := "" 865 for _, subnet := range m.NodeSubnets() { 866 subnetName = subnet.Name 867 } 868 if len(m.NodeSubnets()) == 0 || len(m.NodeSubnets()) > 1 || subnetName == "" { 869 return errors.New("a subnet name must be specified when no subnets are specified or more than 1 subnet of role 'node' exist") 870 } 871 872 m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].SubnetName = subnetName 873 } 874 875 return nil 876 } 877 878 // UpdateDeleteStatus updates a condition on the AzureMachinePool status after a DELETE operation. 879 func (m *MachinePoolScope) UpdateDeleteStatus(condition clusterv1.ConditionType, service string, err error) { 880 switch { 881 case err == nil: 882 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.DeletedReason, clusterv1.ConditionSeverityInfo, "%s successfully deleted", service) 883 case azure.IsOperationNotDoneError(err): 884 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.DeletingReason, clusterv1.ConditionSeverityInfo, "%s deleting", service) 885 default: 886 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.DeletionFailedReason, clusterv1.ConditionSeverityError, "%s failed to delete. err: %s", service, err.Error()) 887 } 888 } 889 890 // UpdatePutStatus updates a condition on the AzureMachinePool status after a PUT operation. 891 func (m *MachinePoolScope) UpdatePutStatus(condition clusterv1.ConditionType, service string, err error) { 892 switch { 893 case err == nil: 894 conditions.MarkTrue(m.AzureMachinePool, condition) 895 case azure.IsOperationNotDoneError(err): 896 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.CreatingReason, clusterv1.ConditionSeverityInfo, "%s creating or updating", service) 897 default: 898 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "%s failed to create or update. err: %s", service, err.Error()) 899 } 900 } 901 902 // UpdatePatchStatus updates a condition on the AzureMachinePool status after a PATCH operation. 903 func (m *MachinePoolScope) UpdatePatchStatus(condition clusterv1.ConditionType, service string, err error) { 904 switch { 905 case err == nil: 906 conditions.MarkTrue(m.AzureMachinePool, condition) 907 case azure.IsOperationNotDoneError(err): 908 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "%s updating", service) 909 default: 910 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "%s failed to update. err: %s", service, err.Error()) 911 } 912 } 913 914 // PatchCAPIMachinePoolObject persists the capi machinepool configuration and status. 915 func (m *MachinePoolScope) PatchCAPIMachinePoolObject(ctx context.Context) error { 916 return m.capiMachinePoolPatchHelper.Patch( 917 ctx, 918 m.MachinePool, 919 ) 920 } 921 922 // UpdateCAPIMachinePoolReplicas updates the associated MachinePool replica count. 923 func (m *MachinePoolScope) UpdateCAPIMachinePoolReplicas(ctx context.Context, replicas *int32) { 924 m.MachinePool.Spec.Replicas = replicas 925 } 926 927 // HasReplicasExternallyManaged returns true if the externally managed annotation is set on the CAPI MachinePool resource. 928 func (m *MachinePoolScope) HasReplicasExternallyManaged(ctx context.Context) bool { 929 return annotations.ReplicasManagedByExternalAutoscaler(m.MachinePool) 930 } 931 932 // ReconcileReplicas ensures MachinePool replicas match VMSS capacity if replicas are externally managed by an autoscaler. 933 func (m *MachinePoolScope) ReconcileReplicas(ctx context.Context, vmss *azure.VMSS) error { 934 if !m.HasReplicasExternallyManaged(ctx) { 935 return nil 936 } 937 938 var replicas int32 = 0 939 if m.MachinePool.Spec.Replicas != nil { 940 replicas = *m.MachinePool.Spec.Replicas 941 } 942 943 if capacity := int32(vmss.Capacity); capacity != replicas { 944 m.UpdateCAPIMachinePoolReplicas(ctx, &capacity) 945 } 946 947 return nil 948 }