sigs.k8s.io/cluster-api-provider-azure@v1.17.0/azure/scope/machinepool.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package scope 18 19 import ( 20 "context" 21 "crypto/sha256" 22 "encoding/base64" 23 "encoding/json" 24 "fmt" 25 "io" 26 "strings" 27 28 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2" 29 "github.com/pkg/errors" 30 corev1 "k8s.io/api/core/v1" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/klog/v2" 34 "k8s.io/utils/ptr" 35 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 36 "sigs.k8s.io/cluster-api-provider-azure/azure" 37 machinepool "sigs.k8s.io/cluster-api-provider-azure/azure/scope/strategies/machinepool_deployments" 38 "sigs.k8s.io/cluster-api-provider-azure/azure/services/resourceskus" 39 "sigs.k8s.io/cluster-api-provider-azure/azure/services/roleassignments" 40 "sigs.k8s.io/cluster-api-provider-azure/azure/services/scalesets" 41 "sigs.k8s.io/cluster-api-provider-azure/azure/services/virtualmachineimages" 42 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 43 azureutil "sigs.k8s.io/cluster-api-provider-azure/util/azure" 44 "sigs.k8s.io/cluster-api-provider-azure/util/futures" 45 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 46 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 47 capierrors "sigs.k8s.io/cluster-api/errors" 48 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 49 "sigs.k8s.io/cluster-api/util" 50 "sigs.k8s.io/cluster-api/util/annotations" 51 "sigs.k8s.io/cluster-api/util/conditions" 52 "sigs.k8s.io/cluster-api/util/labels/format" 53 "sigs.k8s.io/cluster-api/util/patch" 54 "sigs.k8s.io/controller-runtime/pkg/client" 55 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 56 ) 57 58 // ScalesetsServiceName is the name of the scalesets service. 59 // TODO: move this to scalesets.go once we remove the usage in this package, 60 // added here to avoid a circular dependency. 61 const ScalesetsServiceName = "scalesets" 62 63 type ( 64 // MachinePoolScopeParams defines the input parameters used to create a new MachinePoolScope. 65 MachinePoolScopeParams struct { 66 Client client.Client 67 MachinePool *expv1.MachinePool 68 AzureMachinePool *infrav1exp.AzureMachinePool 69 ClusterScope azure.ClusterScoper 70 Cache *MachinePoolCache 71 } 72 73 // MachinePoolScope defines a scope defined around a machine pool and its cluster. 74 MachinePoolScope struct { 75 azure.ClusterScoper 76 AzureMachinePool *infrav1exp.AzureMachinePool 77 MachinePool *expv1.MachinePool 78 client client.Client 79 patchHelper *patch.Helper 80 capiMachinePoolPatchHelper *patch.Helper 81 vmssState *azure.VMSS 82 cache *MachinePoolCache 83 } 84 85 // NodeStatus represents the status of a Kubernetes node. 86 NodeStatus struct { 87 Ready bool 88 Version string 89 } 90 91 // MachinePoolCache stores common machine pool information so we don't have to hit the API multiple times within the same reconcile loop. 92 MachinePoolCache struct { 93 BootstrapData string 94 HasBootstrapDataChanges bool 95 VMImage *infrav1.Image 96 VMSKU resourceskus.SKU 97 MaxSurge int 98 } 99 ) 100 101 // NewMachinePoolScope creates a new MachinePoolScope from the supplied parameters. 102 // This is meant to be called for each reconcile iteration. 103 func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) { 104 if params.Client == nil { 105 return nil, errors.New("client is required when creating a MachinePoolScope") 106 } 107 108 if params.MachinePool == nil { 109 return nil, errors.New("machine pool is required when creating a MachinePoolScope") 110 } 111 112 if params.AzureMachinePool == nil { 113 return nil, errors.New("azure machine pool is required when creating a MachinePoolScope") 114 } 115 116 helper, err := patch.NewHelper(params.AzureMachinePool, params.Client) 117 if err != nil { 118 return nil, errors.Wrap(err, "failed to init patch helper") 119 } 120 121 capiMachinePoolPatchHelper, err := patch.NewHelper(params.MachinePool, params.Client) 122 if err != nil { 123 return nil, errors.Wrap(err, "failed to init capi patch helper") 124 } 125 126 return &MachinePoolScope{ 127 client: params.Client, 128 MachinePool: params.MachinePool, 129 AzureMachinePool: params.AzureMachinePool, 130 patchHelper: helper, 131 capiMachinePoolPatchHelper: capiMachinePoolPatchHelper, 132 ClusterScoper: params.ClusterScope, 133 }, nil 134 } 135 136 // InitMachinePoolCache sets cached information about the machine pool to be used in the scope. 137 func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error { 138 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.InitMachinePoolCache") 139 defer done() 140 141 if m.cache == nil { 142 var err error 143 m.cache = &MachinePoolCache{} 144 145 m.cache.BootstrapData, err = m.GetBootstrapData(ctx) 146 if err != nil { 147 return err 148 } 149 150 m.cache.HasBootstrapDataChanges, err = m.HasBootstrapDataChanges(ctx) 151 if err != nil { 152 return err 153 } 154 155 m.cache.VMImage, err = m.GetVMImage(ctx) 156 if err != nil { 157 return err 158 } 159 m.SaveVMImageToStatus(m.cache.VMImage) 160 161 m.cache.MaxSurge, err = m.MaxSurge() 162 if err != nil { 163 return err 164 } 165 166 skuCache, err := resourceskus.GetCache(m, m.Location()) 167 if err != nil { 168 return err 169 } 170 171 m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines) 172 if err != nil { 173 return errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachinePool.Spec.Template.VMSize) 174 } 175 } 176 177 return nil 178 } 179 180 // ScaleSetSpec returns the scale set spec. 181 func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecGetter { 182 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.ScaleSetSpec") 183 defer done() 184 185 spec := &scalesets.ScaleSetSpec{ 186 Name: m.Name(), 187 ResourceGroup: m.NodeResourceGroup(), 188 Size: m.AzureMachinePool.Spec.Template.VMSize, 189 Capacity: int64(ptr.Deref[int32](m.MachinePool.Spec.Replicas, 0)), 190 SSHKeyData: m.AzureMachinePool.Spec.Template.SSHPublicKey, 191 OSDisk: m.AzureMachinePool.Spec.Template.OSDisk, 192 DataDisks: m.AzureMachinePool.Spec.Template.DataDisks, 193 SubnetName: m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].SubnetName, 194 VNetName: m.Vnet().Name, 195 VNetResourceGroup: m.Vnet().ResourceGroup, 196 PublicLBName: m.OutboundLBName(infrav1.Node), 197 PublicLBAddressPoolName: m.OutboundPoolName(infrav1.Node), 198 AcceleratedNetworking: m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].AcceleratedNetworking, 199 Identity: m.AzureMachinePool.Spec.Identity, 200 UserAssignedIdentities: m.AzureMachinePool.Spec.UserAssignedIdentities, 201 DiagnosticsProfile: m.AzureMachinePool.Spec.Template.Diagnostics, 202 SecurityProfile: m.AzureMachinePool.Spec.Template.SecurityProfile, 203 SpotVMOptions: m.AzureMachinePool.Spec.Template.SpotVMOptions, 204 FailureDomains: m.MachinePool.Spec.FailureDomains, 205 TerminateNotificationTimeout: m.AzureMachinePool.Spec.Template.TerminateNotificationTimeout, 206 NetworkInterfaces: m.AzureMachinePool.Spec.Template.NetworkInterfaces, 207 IPv6Enabled: m.IsIPv6Enabled(), 208 OrchestrationMode: m.AzureMachinePool.Spec.OrchestrationMode, 209 Location: m.AzureMachinePool.Spec.Location, 210 SubscriptionID: m.SubscriptionID(), 211 HasReplicasExternallyManaged: m.HasReplicasExternallyManaged(ctx), 212 ClusterName: m.ClusterName(), 213 AdditionalTags: m.AdditionalTags(), 214 PlatformFaultDomainCount: m.AzureMachinePool.Spec.PlatformFaultDomainCount, 215 ZoneBalance: m.AzureMachinePool.Spec.ZoneBalance, 216 } 217 218 if m.AzureMachinePool.Spec.ZoneBalance != nil && len(m.MachinePool.Spec.FailureDomains) <= 1 { 219 log.V(4).Info("zone balance is enabled but one or less failure domains are specified, zone balance will be disabled") 220 spec.ZoneBalance = nil 221 } 222 223 if m.cache != nil { 224 if m.HasReplicasExternallyManaged(ctx) { 225 spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges 226 log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData) 227 } 228 spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs() 229 spec.SKU = m.cache.VMSKU 230 spec.VMImage = m.cache.VMImage 231 spec.BootstrapData = m.cache.BootstrapData 232 spec.MaxSurge = m.cache.MaxSurge 233 } else { 234 log.V(4).Info("machinepool cache is nil, this is only expected when deleting a machinepool") 235 } 236 237 return spec 238 } 239 240 // Name returns the Azure Machine Pool Name. 241 func (m *MachinePoolScope) Name() string { 242 // Windows Machine pools names cannot be longer than 9 chars 243 if m.AzureMachinePool.Spec.Template.OSDisk.OSType == azure.WindowsOS && len(m.AzureMachinePool.Name) > 9 { 244 return "win-" + m.AzureMachinePool.Name[len(m.AzureMachinePool.Name)-5:] 245 } 246 return m.AzureMachinePool.Name 247 } 248 249 // SetInfrastructureMachineKind sets the infrastructure machine kind in the status if it is not set already, returning 250 // `true` if the status was updated. This supports MachinePool Machines. 251 func (m *MachinePoolScope) SetInfrastructureMachineKind() bool { 252 if m.AzureMachinePool.Status.InfrastructureMachineKind != infrav1exp.AzureMachinePoolMachineKind { 253 m.AzureMachinePool.Status.InfrastructureMachineKind = infrav1exp.AzureMachinePoolMachineKind 254 255 return true 256 } 257 258 return false 259 } 260 261 // ProviderID returns the AzureMachinePool ID by parsing Spec.ProviderID. 262 func (m *MachinePoolScope) ProviderID() string { 263 resourceID, err := azureutil.ParseResourceID(m.AzureMachinePool.Spec.ProviderID) 264 if err != nil { 265 return "" 266 } 267 return resourceID.Name 268 } 269 270 // SetProviderID sets the AzureMachinePool providerID in spec. 271 func (m *MachinePoolScope) SetProviderID(v string) { 272 m.AzureMachinePool.Spec.ProviderID = v 273 } 274 275 // SystemAssignedIdentityName returns the scope for the system assigned identity. 276 func (m *MachinePoolScope) SystemAssignedIdentityName() string { 277 if m.AzureMachinePool.Spec.SystemAssignedIdentityRole != nil { 278 return m.AzureMachinePool.Spec.SystemAssignedIdentityRole.Name 279 } 280 return "" 281 } 282 283 // SystemAssignedIdentityScope returns the scope for the system assigned identity. 284 func (m *MachinePoolScope) SystemAssignedIdentityScope() string { 285 if m.AzureMachinePool.Spec.SystemAssignedIdentityRole != nil { 286 return m.AzureMachinePool.Spec.SystemAssignedIdentityRole.Scope 287 } 288 return "" 289 } 290 291 // SystemAssignedIdentityDefinitionID returns the role definition ID for the system assigned identity. 292 func (m *MachinePoolScope) SystemAssignedIdentityDefinitionID() string { 293 if m.AzureMachinePool.Spec.SystemAssignedIdentityRole != nil { 294 return m.AzureMachinePool.Spec.SystemAssignedIdentityRole.DefinitionID 295 } 296 return "" 297 } 298 299 // ProvisioningState returns the AzureMachinePool provisioning state. 300 func (m *MachinePoolScope) ProvisioningState() infrav1.ProvisioningState { 301 if m.AzureMachinePool.Status.ProvisioningState != nil { 302 return *m.AzureMachinePool.Status.ProvisioningState 303 } 304 return "" 305 } 306 307 // SetVMSSState updates the machine pool scope with the current state of the VMSS. 308 func (m *MachinePoolScope) SetVMSSState(vmssState *azure.VMSS) { 309 m.vmssState = vmssState 310 } 311 312 // NeedsRequeue return true if any machines are not on the latest model or the VMSS is not in a terminal provisioning 313 // state. 314 func (m *MachinePoolScope) NeedsRequeue() bool { 315 state := m.AzureMachinePool.Status.ProvisioningState 316 if m.vmssState == nil { 317 return state != nil && infrav1.IsTerminalProvisioningState(*state) 318 } 319 320 if !m.vmssState.HasLatestModelAppliedToAll() { 321 return true 322 } 323 324 desiredMatchesActual := len(m.vmssState.Instances) == int(m.DesiredReplicas()) 325 return !(state != nil && infrav1.IsTerminalProvisioningState(*state) && desiredMatchesActual) 326 } 327 328 // DesiredReplicas returns the replica count on machine pool or 0 if machine pool replicas is nil. 329 func (m MachinePoolScope) DesiredReplicas() int32 { 330 return ptr.Deref[int32](m.MachinePool.Spec.Replicas, 0) 331 } 332 333 // MaxSurge returns the number of machines to surge, or 0 if the deployment strategy does not support surge. 334 func (m MachinePoolScope) MaxSurge() (int, error) { 335 if surger, ok := m.getDeploymentStrategy().(machinepool.Surger); ok { 336 surgeCount, err := surger.Surge(int(m.DesiredReplicas())) 337 if err != nil { 338 return 0, errors.Wrap(err, "failed to calculate surge for the machine pool") 339 } 340 341 return surgeCount, nil 342 } 343 344 return 0, nil 345 } 346 347 // updateReplicasAndProviderIDs ties the Azure VMSS instance data and the Node status data together to build and update 348 // the AzureMachinePool replica count and providerIDList. 349 func (m *MachinePoolScope) updateReplicasAndProviderIDs(ctx context.Context) error { 350 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.UpdateInstanceStatuses") 351 defer done() 352 353 machines, err := m.GetMachinePoolMachines(ctx) 354 if err != nil { 355 return errors.Wrap(err, "failed to get machine pool machines") 356 } 357 358 var readyReplicas int32 359 providerIDs := make([]string, len(machines)) 360 for i, machine := range machines { 361 if machine.Status.Ready { 362 readyReplicas++ 363 } 364 providerIDs[i] = machine.Spec.ProviderID 365 } 366 367 m.AzureMachinePool.Status.Replicas = readyReplicas 368 m.AzureMachinePool.Spec.ProviderIDList = providerIDs 369 return nil 370 } 371 372 func (m *MachinePoolScope) getMachinePoolMachineLabels() map[string]string { 373 return map[string]string{ 374 clusterv1.ClusterNameLabel: m.ClusterName(), 375 infrav1exp.MachinePoolNameLabel: m.AzureMachinePool.Name, 376 clusterv1.MachinePoolNameLabel: format.MustFormatValue(m.MachinePool.Name), 377 m.ClusterName(): string(infrav1.ResourceLifecycleOwned), 378 } 379 } 380 381 // GetMachinePoolMachines returns the list of AzureMachinePoolMachines associated with this AzureMachinePool. 382 func (m *MachinePoolScope) GetMachinePoolMachines(ctx context.Context) ([]infrav1exp.AzureMachinePoolMachine, error) { 383 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.getMachinePoolMachines") 384 defer done() 385 386 labels := m.getMachinePoolMachineLabels() 387 ampml := &infrav1exp.AzureMachinePoolMachineList{} 388 if err := m.client.List(ctx, ampml, client.InNamespace(m.AzureMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { 389 return nil, errors.Wrap(err, "failed to list AzureMachinePoolMachines") 390 } 391 392 return ampml.Items, nil 393 } 394 395 func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) error { 396 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.applyAzureMachinePoolMachines") 397 defer done() 398 399 if m.vmssState == nil { 400 return nil 401 } 402 403 ampms, err := m.GetMachinePoolMachines(ctx) 404 if err != nil { 405 return err 406 } 407 408 existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampms)) 409 for _, ampm := range ampms { 410 machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta) 411 if err != nil { 412 return fmt.Errorf("failed to find owner machine for %s/%s: %w", ampm.Namespace, ampm.Name, err) 413 } 414 415 if _, ampmHasDeleteAnnotation := ampm.Annotations[clusterv1.DeleteMachineAnnotation]; !ampmHasDeleteAnnotation { 416 // fetch Machine delete annotation from owner machine to AzureMachinePoolMachine. 417 // This ensures setting a deleteMachine annotation on the Machine has an effect on the AzureMachinePoolMachine 418 // and the deployment strategy, in case the automatic propagation of the annotation from Machine to AzureMachinePoolMachine 419 // hasn't been done yet. 420 if machine != nil && machine.Annotations != nil { 421 if _, hasDeleteAnnotation := machine.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation { 422 log.V(4).Info("fetched DeleteMachineAnnotation", "machine", ampm.Spec.ProviderID) 423 if ampm.Annotations == nil { 424 ampm.Annotations = make(map[string]string) 425 } 426 ampm.Annotations[clusterv1.DeleteMachineAnnotation] = machine.Annotations[clusterv1.DeleteMachineAnnotation] 427 } 428 } 429 } else { 430 log.V(4).Info("DeleteMachineAnnotation already set") 431 } 432 433 existingMachinesByProviderID[ampm.Spec.ProviderID] = ampm 434 } 435 436 // determine which machines need to be created to reflect the current state in Azure 437 azureMachinesByProviderID := m.vmssState.InstancesByProviderID(m.AzureMachinePool.Spec.OrchestrationMode) 438 for key, val := range azureMachinesByProviderID { 439 if val.State == infrav1.Deleting || val.State == infrav1.Deleted { 440 log.V(4).Info("not recreating AzureMachinePoolMachine because VMSS VM is deleting", "providerID", key) 441 continue 442 } 443 if _, ok := existingMachinesByProviderID[key]; !ok { 444 log.V(4).Info("creating AzureMachinePoolMachine", "providerID", key) 445 if err := m.createMachine(ctx, val); err != nil { 446 return errors.Wrap(err, "failed creating AzureMachinePoolMachine") 447 } 448 continue 449 } 450 } 451 452 deleted := false 453 // Delete MachinePool Machines for instances that no longer exist in Azure, i.e. deleted out-of-band 454 for key, ampm := range existingMachinesByProviderID { 455 ampm := ampm 456 if _, ok := azureMachinesByProviderID[key]; !ok { 457 deleted = true 458 log.V(4).Info("deleting AzureMachinePoolMachine because it no longer exists in the VMSS", "providerID", key) 459 delete(existingMachinesByProviderID, key) 460 if err := m.DeleteMachine(ctx, ampm); err != nil { 461 return errors.Wrap(err, "failed deleting AzureMachinePoolMachine no longer existing in Azure") 462 } 463 } 464 } 465 466 if deleted { 467 log.V(4).Info("exiting early due to finding AzureMachinePoolMachine(s) that were deleted because they no longer exist in the VMSS") 468 // exit early to be less greedy about delete 469 return nil 470 } 471 472 if futures.Has(m.AzureMachinePool, m.Name(), ScalesetsServiceName, infrav1.PatchFuture) || 473 futures.Has(m.AzureMachinePool, m.Name(), ScalesetsServiceName, infrav1.PutFuture) || 474 futures.Has(m.AzureMachinePool, m.Name(), ScalesetsServiceName, infrav1.DeleteFuture) { 475 log.V(4).Info("exiting early due an in-progress long running operation on the ScaleSet") 476 // exit early to be less greedy about delete 477 return nil 478 } 479 480 // when replicas are externally managed, we do not want to scale down manually since that is handled by the external scaler. 481 if m.HasReplicasExternallyManaged(ctx) { 482 log.V(4).Info("exiting early due to replicas externally managed") 483 return nil 484 } 485 486 deleteSelector := m.getDeploymentStrategy() 487 if deleteSelector == nil { 488 log.V(4).Info("can not select AzureMachinePoolMachines to delete because no deployment strategy is specified") 489 return nil 490 } 491 492 // Select Machines to delete to lower the replica count 493 toDelete, err := deleteSelector.SelectMachinesToDelete(ctx, m.DesiredReplicas(), existingMachinesByProviderID) 494 if err != nil { 495 return errors.Wrap(err, "failed selecting AzureMachinePoolMachine(s) to delete") 496 } 497 498 // Delete MachinePool Machines as a part of scaling down 499 for i := range toDelete { 500 ampm := toDelete[i] 501 log.Info("deleting selected AzureMachinePoolMachine", "providerID", ampm.Spec.ProviderID) 502 if err := m.DeleteMachine(ctx, ampm); err != nil { 503 return errors.Wrap(err, "failed deleting AzureMachinePoolMachine to reduce replica count") 504 } 505 } 506 507 log.V(4).Info("done reconciling AzureMachinePoolMachine(s)") 508 return nil 509 } 510 511 func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSSVM) error { 512 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.createMachine") 513 defer done() 514 515 parsed, err := azureutil.ParseResourceID(machine.ID) 516 if err != nil { 517 return errors.Wrap(err, fmt.Sprintf("failed to parse resource id %q", machine.ID)) 518 } 519 instanceID := strings.ReplaceAll(parsed.Name, "_", "-") 520 521 ampm := infrav1exp.AzureMachinePoolMachine{ 522 ObjectMeta: metav1.ObjectMeta{ 523 Name: m.AzureMachinePool.Name + "-" + instanceID, 524 Namespace: m.AzureMachinePool.Namespace, 525 OwnerReferences: []metav1.OwnerReference{ 526 { 527 APIVersion: infrav1exp.GroupVersion.String(), 528 Kind: infrav1.AzureMachinePoolKind, 529 Name: m.AzureMachinePool.Name, 530 BlockOwnerDeletion: ptr.To(true), 531 UID: m.AzureMachinePool.UID, 532 }, 533 }, 534 Annotations: map[string]string{}, 535 }, 536 Spec: infrav1exp.AzureMachinePoolMachineSpec{ 537 ProviderID: machine.ProviderID(), 538 InstanceID: machine.InstanceID, 539 }, 540 } 541 542 labels := m.getMachinePoolMachineLabels() 543 ampm.Labels = labels 544 545 controllerutil.AddFinalizer(&m, infrav1exp.AzureMachinePoolMachineFinalizer) 546 conditions.MarkFalse(&m, infrav1.VMRunningCondition, string(infrav1.Creating), clusterv1.ConditionSeverityInfo, "") 547 if err := m.client.Create(ctx, &m); err != nil { 548 return errors.Wrapf(err, "failed creating AzureMachinePoolMachine %s in AzureMachinePool %s", machine.ID, m.AzureMachinePool.Name) 549 } 550 551 return nil 552 } 553 554 // DeleteMachine deletes an AzureMachinePoolMachine by fetching its owner Machine and deleting it. This ensures that the node cordon/drain happens before deleting the infrastructure. 555 func (m *MachinePoolScope) DeleteMachine(ctx context.Context, ampm infrav1exp.AzureMachinePoolMachine) error { 556 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.DeleteMachine") 557 defer done() 558 559 machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta) 560 if err != nil { 561 return errors.Wrapf(err, "error getting owner Machine for AzureMachinePoolMachine %s/%s", ampm.Namespace, ampm.Name) 562 } 563 if machine == nil { 564 log.V(2).Info("No owner Machine exists for AzureMachinePoolMachine", "ampm", klog.KObj(&m)) 565 // If the AzureMachinePoolMachine does not have an owner Machine, do not attempt to delete the AzureMachinePoolMachine as the MachinePool controller will create the 566 // Machine and we want to let it catch up. If we are too hasty to delete, that introduces a race condition where the AzureMachinePoolMachine could be deleted 567 // just as the Machine comes online. 568 569 // In the case where the MachinePool is being deleted and the Machine will never come online, the AzureMachinePoolMachine will be deleted via its ownerRef to the 570 // AzureMachinePool, so that is covered as well. 571 572 return nil 573 } 574 575 if err := m.client.Delete(ctx, machine); err != nil { 576 return errors.Wrapf(err, "failed to delete Machine %s for AzureMachinePoolMachine %s in MachinePool %s", machine.Name, ampm.Name, m.MachinePool.Name) 577 } 578 579 return nil 580 } 581 582 // SetLongRunningOperationState will set the future on the AzureMachinePool status to allow the resource to continue 583 // in the next reconciliation. 584 func (m *MachinePoolScope) SetLongRunningOperationState(future *infrav1.Future) { 585 futures.Set(m.AzureMachinePool, future) 586 } 587 588 // GetLongRunningOperationState will get the future on the AzureMachinePool status. 589 func (m *MachinePoolScope) GetLongRunningOperationState(name, service, futureType string) *infrav1.Future { 590 return futures.Get(m.AzureMachinePool, name, service, futureType) 591 } 592 593 // DeleteLongRunningOperationState will delete the future from the AzureMachinePool status. 594 func (m *MachinePoolScope) DeleteLongRunningOperationState(name, service, futureType string) { 595 futures.Delete(m.AzureMachinePool, name, service, futureType) 596 } 597 598 // setProvisioningStateAndConditions sets the AzureMachinePool provisioning state and conditions. 599 func (m *MachinePoolScope) setProvisioningStateAndConditions(v infrav1.ProvisioningState) { 600 m.AzureMachinePool.Status.ProvisioningState = &v 601 switch { 602 case v == infrav1.Succeeded && *m.MachinePool.Spec.Replicas == m.AzureMachinePool.Status.Replicas: 603 // vmss is provisioned with enough ready replicas 604 conditions.MarkTrue(m.AzureMachinePool, infrav1.ScaleSetRunningCondition) 605 conditions.MarkTrue(m.AzureMachinePool, infrav1.ScaleSetModelUpdatedCondition) 606 conditions.MarkTrue(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition) 607 m.SetReady() 608 case v == infrav1.Succeeded && *m.MachinePool.Spec.Replicas != m.AzureMachinePool.Status.Replicas: 609 // not enough ready or too many ready replicas we must still be scaling up or down 610 updatingState := infrav1.Updating 611 m.AzureMachinePool.Status.ProvisioningState = &updatingState 612 if *m.MachinePool.Spec.Replicas > m.AzureMachinePool.Status.Replicas { 613 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition, infrav1.ScaleSetScaleUpReason, clusterv1.ConditionSeverityInfo, "") 614 } else { 615 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetDesiredReplicasCondition, infrav1.ScaleSetScaleDownReason, clusterv1.ConditionSeverityInfo, "") 616 } 617 m.SetNotReady() 618 case v == infrav1.Updating: 619 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetModelUpdatedCondition, infrav1.ScaleSetModelOutOfDateReason, clusterv1.ConditionSeverityInfo, "") 620 m.SetNotReady() 621 case v == infrav1.Creating: 622 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetCreatingReason, clusterv1.ConditionSeverityInfo, "") 623 m.SetNotReady() 624 case v == infrav1.Deleting: 625 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, infrav1.ScaleSetDeletingReason, clusterv1.ConditionSeverityInfo, "") 626 m.SetNotReady() 627 default: 628 conditions.MarkFalse(m.AzureMachinePool, infrav1.ScaleSetRunningCondition, string(v), clusterv1.ConditionSeverityInfo, "") 629 m.SetNotReady() 630 } 631 } 632 633 // SetReady sets the AzureMachinePool Ready Status to true. 634 func (m *MachinePoolScope) SetReady() { 635 m.AzureMachinePool.Status.Ready = true 636 } 637 638 // SetNotReady sets the AzureMachinePool Ready Status to false. 639 func (m *MachinePoolScope) SetNotReady() { 640 m.AzureMachinePool.Status.Ready = false 641 } 642 643 // SetFailureMessage sets the AzureMachinePool status failure message. 644 func (m *MachinePoolScope) SetFailureMessage(v error) { 645 m.AzureMachinePool.Status.FailureMessage = ptr.To(v.Error()) 646 } 647 648 // SetFailureReason sets the AzureMachinePool status failure reason. 649 func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) { 650 m.AzureMachinePool.Status.FailureReason = &v 651 } 652 653 // AdditionalTags merges AdditionalTags from the scope's AzureCluster and AzureMachinePool. If the same key is present in both, 654 // the value from AzureMachinePool takes precedence. 655 func (m *MachinePoolScope) AdditionalTags() infrav1.Tags { 656 tags := make(infrav1.Tags) 657 // Start with the cluster-wide tags... 658 tags.Merge(m.ClusterScoper.AdditionalTags()) 659 // ... and merge in the Machine Pool's 660 tags.Merge(m.AzureMachinePool.Spec.AdditionalTags) 661 // Set the cloud provider tag 662 tags[infrav1.ClusterAzureCloudProviderTagKey(m.ClusterName())] = string(infrav1.ResourceLifecycleOwned) 663 664 return tags 665 } 666 667 // SetAnnotation sets a key value annotation on the AzureMachinePool. 668 func (m *MachinePoolScope) SetAnnotation(key, value string) { 669 if m.AzureMachinePool.Annotations == nil { 670 m.AzureMachinePool.Annotations = map[string]string{} 671 } 672 m.AzureMachinePool.Annotations[key] = value 673 } 674 675 // PatchObject persists the AzureMachinePool spec and status. 676 func (m *MachinePoolScope) PatchObject(ctx context.Context) error { 677 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.PatchObject") 678 defer done() 679 680 conditions.SetSummary(m.AzureMachinePool) 681 return m.patchHelper.Patch( 682 ctx, 683 m.AzureMachinePool, 684 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 685 clusterv1.ReadyCondition, 686 infrav1.BootstrapSucceededCondition, 687 infrav1.ScaleSetDesiredReplicasCondition, 688 infrav1.ScaleSetModelUpdatedCondition, 689 infrav1.ScaleSetRunningCondition, 690 }}) 691 } 692 693 // Close the MachinePoolScope by updating the AzureMachinePool spec and AzureMachinePool status. 694 func (m *MachinePoolScope) Close(ctx context.Context) error { 695 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.Close") 696 defer done() 697 698 if m.vmssState != nil { 699 if err := m.applyAzureMachinePoolMachines(ctx); err != nil { 700 log.Error(err, "failed to apply changes to the AzureMachinePoolMachines") 701 return errors.Wrap(err, "failed to apply changes to AzureMachinePoolMachines") 702 } 703 704 m.setProvisioningStateAndConditions(m.vmssState.State) 705 if err := m.updateReplicasAndProviderIDs(ctx); err != nil { 706 return errors.Wrap(err, "failed to update replicas and providerIDs") 707 } 708 if m.HasReplicasExternallyManaged(ctx) { 709 if err := m.updateCustomDataHash(ctx); err != nil { 710 // ignore errors to calculating the custom data hash since it's not absolutely crucial. 711 log.V(4).Error(err, "unable to update custom data hash, ignoring.") 712 } 713 } 714 } 715 716 if err := m.PatchObject(ctx); err != nil { 717 return errors.Wrap(err, "unable to patch AzureMachinePool") 718 } 719 if err := m.PatchCAPIMachinePoolObject(ctx); err != nil { 720 return errors.Wrap(err, "unable to patch CAPI MachinePool") 721 } 722 return nil 723 } 724 725 // GetBootstrapData returns the bootstrap data from the secret in the MachinePool's bootstrap.dataSecretName. 726 func (m *MachinePoolScope) GetBootstrapData(ctx context.Context) (string, error) { 727 ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetBootstrapData") 728 defer done() 729 730 dataSecretName := m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName 731 if dataSecretName == nil { 732 return "", errors.New("error retrieving bootstrap data: linked MachinePool Spec's bootstrap.dataSecretName is nil") 733 } 734 secret := &corev1.Secret{} 735 key := types.NamespacedName{Namespace: m.AzureMachinePool.Namespace, Name: *dataSecretName} 736 if err := m.client.Get(ctx, key, secret); err != nil { 737 return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for AzureMachinePool %s/%s", m.AzureMachinePool.Namespace, m.Name()) 738 } 739 740 value, ok := secret.Data["value"] 741 if !ok { 742 return "", errors.New("error retrieving bootstrap data: secret value key is missing") 743 } 744 return base64.StdEncoding.EncodeToString(value), nil 745 } 746 747 // calculateBootstrapDataHash calculates the sha256 hash of the bootstrap data. 748 func (m *MachinePoolScope) calculateBootstrapDataHash(_ context.Context) (string, error) { 749 bootstrapData := m.cache.BootstrapData 750 h := sha256.New() 751 n, err := io.WriteString(h, bootstrapData) 752 if err != nil || n == 0 { 753 return "", fmt.Errorf("unable to write custom data (bytes written: %q): %w", n, err) 754 } 755 return fmt.Sprintf("%x", h.Sum(nil)), nil 756 } 757 758 // HasBootstrapDataChanges calculates the sha256 hash of the bootstrap data and compares it with the saved hash in AzureMachinePool.Status. 759 func (m *MachinePoolScope) HasBootstrapDataChanges(ctx context.Context) (bool, error) { 760 newHash, err := m.calculateBootstrapDataHash(ctx) 761 if err != nil { 762 return false, err 763 } 764 return m.AzureMachinePool.GetAnnotations()[azure.CustomDataHashAnnotation] != newHash, nil 765 } 766 767 // updateCustomDataHash calculates the sha256 hash of the bootstrap data and saves it in AzureMachinePool.Status. 768 func (m *MachinePoolScope) updateCustomDataHash(ctx context.Context) error { 769 newHash, err := m.calculateBootstrapDataHash(ctx) 770 if err != nil { 771 return err 772 } 773 m.SetAnnotation(azure.CustomDataHashAnnotation, newHash) 774 return nil 775 } 776 777 // GetVMImage picks an image from the AzureMachinePool configuration, or uses a default one. 778 func (m *MachinePoolScope) GetVMImage(ctx context.Context) (*infrav1.Image, error) { 779 ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.GetVMImage") 780 defer done() 781 782 // Use custom Marketplace image, Image ID or a Shared Image Gallery image if provided 783 if m.AzureMachinePool.Spec.Template.Image != nil { 784 return m.AzureMachinePool.Spec.Template.Image, nil 785 } 786 787 var ( 788 err error 789 defaultImage *infrav1.Image 790 ) 791 792 svc, err := virtualmachineimages.New(m) 793 if err != nil { 794 return nil, errors.Wrap(err, "failed to create virtualmachineimages service") 795 } 796 797 if m.AzureMachinePool.Spec.Template.OSDisk.OSType == azure.WindowsOS { 798 runtime := m.AzureMachinePool.Annotations["runtime"] 799 windowsServerVersion := m.AzureMachinePool.Annotations["windowsServerVersion"] 800 log.V(4).Info("No image specified for machine, using default Windows Image", "machine", m.MachinePool.GetName(), "runtime", runtime, "windowsServerVersion", windowsServerVersion) 801 defaultImage, err = svc.GetDefaultWindowsImage(ctx, m.Location(), ptr.Deref(m.MachinePool.Spec.Template.Spec.Version, ""), runtime, windowsServerVersion) 802 } else { 803 defaultImage, err = svc.GetDefaultUbuntuImage(ctx, m.Location(), ptr.Deref(m.MachinePool.Spec.Template.Spec.Version, "")) 804 } 805 806 if err != nil { 807 return defaultImage, errors.Wrap(err, "failed to get default OS image") 808 } 809 810 return defaultImage, nil 811 } 812 813 // SaveVMImageToStatus persists the AzureMachinePool image to the status. 814 func (m *MachinePoolScope) SaveVMImageToStatus(image *infrav1.Image) { 815 m.AzureMachinePool.Status.Image = image 816 } 817 818 // RoleAssignmentSpecs returns the role assignment specs. 819 func (m *MachinePoolScope) RoleAssignmentSpecs(principalID *string) []azure.ResourceSpecGetter { 820 roles := make([]azure.ResourceSpecGetter, 1) 821 if m.HasSystemAssignedIdentity() { 822 roles[0] = &roleassignments.RoleAssignmentSpec{ 823 Name: m.SystemAssignedIdentityName(), 824 MachineName: m.Name(), 825 ResourceGroup: m.NodeResourceGroup(), 826 ResourceType: azure.VirtualMachineScaleSet, 827 Scope: m.SystemAssignedIdentityScope(), 828 RoleDefinitionID: m.SystemAssignedIdentityDefinitionID(), 829 PrincipalID: principalID, 830 PrincipalType: armauthorization.PrincipalTypeServicePrincipal, 831 } 832 return roles 833 } 834 return []azure.ResourceSpecGetter{} 835 } 836 837 // RoleAssignmentResourceType returns the role assignment resource type. 838 func (m *MachinePoolScope) RoleAssignmentResourceType() string { 839 return azure.VirtualMachineScaleSet 840 } 841 842 // HasSystemAssignedIdentity returns true if the azure machine pool has system 843 // assigned identity. 844 func (m *MachinePoolScope) HasSystemAssignedIdentity() bool { 845 return m.AzureMachinePool.Spec.Identity == infrav1.VMIdentitySystemAssigned 846 } 847 848 // VMSSExtensionSpecs returns the VMSS extension specs. 849 func (m *MachinePoolScope) VMSSExtensionSpecs() []azure.ResourceSpecGetter { 850 var extensionSpecs = []azure.ResourceSpecGetter{} 851 852 for _, extension := range m.AzureMachinePool.Spec.Template.VMExtensions { 853 extensionSpecs = append(extensionSpecs, &scalesets.VMSSExtensionSpec{ 854 ExtensionSpec: azure.ExtensionSpec{ 855 Name: extension.Name, 856 VMName: m.Name(), 857 Publisher: extension.Publisher, 858 Version: extension.Version, 859 Settings: extension.Settings, 860 ProtectedSettings: extension.ProtectedSettings, 861 }, 862 ResourceGroup: m.NodeResourceGroup(), 863 }) 864 } 865 866 cpuArchitectureType, _ := m.cache.VMSKU.GetCapability(resourceskus.CPUArchitectureType) 867 bootstrapExtensionSpec := azure.GetBootstrappingVMExtension(m.AzureMachinePool.Spec.Template.OSDisk.OSType, m.CloudEnvironment(), m.Name(), cpuArchitectureType) 868 869 if bootstrapExtensionSpec != nil { 870 extensionSpecs = append(extensionSpecs, &scalesets.VMSSExtensionSpec{ 871 ExtensionSpec: *bootstrapExtensionSpec, 872 ResourceGroup: m.NodeResourceGroup(), 873 }) 874 } 875 876 return extensionSpecs 877 } 878 879 func (m *MachinePoolScope) getDeploymentStrategy() machinepool.TypedDeleteSelector { 880 if m.AzureMachinePool == nil { 881 return nil 882 } 883 884 return machinepool.NewMachinePoolDeploymentStrategy(m.AzureMachinePool.Spec.Strategy) 885 } 886 887 // SetSubnetName defaults the AzureMachinePool subnet name to the name of the subnet with role 'node' when there is only one of them. 888 // Note: this logic exists only for purposes of ensuring backwards compatibility for old clusters created without the `subnetName` field being 889 // set, and should be removed in the future when this field is no longer optional. 890 func (m *MachinePoolScope) SetSubnetName() error { 891 if m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].SubnetName == "" { 892 subnetName := "" 893 for _, subnet := range m.NodeSubnets() { 894 subnetName = subnet.Name 895 } 896 if len(m.NodeSubnets()) == 0 || len(m.NodeSubnets()) > 1 || subnetName == "" { 897 return errors.New("a subnet name must be specified when no subnets are specified or more than 1 subnet of role 'node' exist") 898 } 899 900 m.AzureMachinePool.Spec.Template.NetworkInterfaces[0].SubnetName = subnetName 901 } 902 903 return nil 904 } 905 906 // UpdateDeleteStatus updates a condition on the AzureMachinePool status after a DELETE operation. 907 func (m *MachinePoolScope) UpdateDeleteStatus(condition clusterv1.ConditionType, service string, err error) { 908 switch { 909 case err == nil: 910 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.DeletedReason, clusterv1.ConditionSeverityInfo, "%s successfully deleted", service) 911 case azure.IsOperationNotDoneError(err): 912 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.DeletingReason, clusterv1.ConditionSeverityInfo, "%s deleting", service) 913 default: 914 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.DeletionFailedReason, clusterv1.ConditionSeverityError, "%s failed to delete. err: %s", service, err.Error()) 915 } 916 } 917 918 // UpdatePutStatus updates a condition on the AzureMachinePool status after a PUT operation. 919 func (m *MachinePoolScope) UpdatePutStatus(condition clusterv1.ConditionType, service string, err error) { 920 switch { 921 case err == nil: 922 conditions.MarkTrue(m.AzureMachinePool, condition) 923 case azure.IsOperationNotDoneError(err): 924 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.CreatingReason, clusterv1.ConditionSeverityInfo, "%s creating or updating", service) 925 default: 926 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "%s failed to create or update. err: %s", service, err.Error()) 927 } 928 } 929 930 // UpdatePatchStatus updates a condition on the AzureMachinePool status after a PATCH operation. 931 func (m *MachinePoolScope) UpdatePatchStatus(condition clusterv1.ConditionType, service string, err error) { 932 switch { 933 case err == nil: 934 conditions.MarkTrue(m.AzureMachinePool, condition) 935 case azure.IsOperationNotDoneError(err): 936 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "%s updating", service) 937 default: 938 conditions.MarkFalse(m.AzureMachinePool, condition, infrav1.FailedReason, clusterv1.ConditionSeverityError, "%s failed to update. err: %s", service, err.Error()) 939 } 940 } 941 942 // PatchCAPIMachinePoolObject persists the capi machinepool configuration and status. 943 func (m *MachinePoolScope) PatchCAPIMachinePoolObject(ctx context.Context) error { 944 return m.capiMachinePoolPatchHelper.Patch( 945 ctx, 946 m.MachinePool, 947 ) 948 } 949 950 // UpdateCAPIMachinePoolReplicas updates the associated MachinePool replica count. 951 func (m *MachinePoolScope) UpdateCAPIMachinePoolReplicas(ctx context.Context, replicas *int32) { 952 m.MachinePool.Spec.Replicas = replicas 953 } 954 955 // HasReplicasExternallyManaged returns true if the externally managed annotation is set on the CAPI MachinePool resource. 956 func (m *MachinePoolScope) HasReplicasExternallyManaged(ctx context.Context) bool { 957 return annotations.ReplicasManagedByExternalAutoscaler(m.MachinePool) 958 } 959 960 // ReconcileReplicas ensures MachinePool replicas match VMSS capacity if replicas are externally managed by an autoscaler. 961 func (m *MachinePoolScope) ReconcileReplicas(ctx context.Context, vmss *azure.VMSS) error { 962 if !m.HasReplicasExternallyManaged(ctx) { 963 return nil 964 } 965 966 var replicas int32 = 0 967 if m.MachinePool.Spec.Replicas != nil { 968 replicas = *m.MachinePool.Spec.Replicas 969 } 970 971 if capacity := int32(vmss.Capacity); capacity != replicas { 972 m.UpdateCAPIMachinePoolReplicas(ctx, &capacity) 973 } 974 975 return nil 976 } 977 978 // AnnotationJSON returns a map[string]interface from a JSON annotation. 979 func (m *MachinePoolScope) AnnotationJSON(annotation string) (map[string]interface{}, error) { 980 out := map[string]interface{}{} 981 jsonAnnotation := m.AzureMachinePool.GetAnnotations()[annotation] 982 if jsonAnnotation == "" { 983 return out, nil 984 } 985 err := json.Unmarshal([]byte(jsonAnnotation), &out) 986 if err != nil { 987 return out, err 988 } 989 return out, nil 990 } 991 992 // UpdateAnnotationJSON updates the `annotation` with 993 // `content`. `content` in this case should be a `map[string]interface{}` 994 // suitable for turning into JSON. This `content` map will be marshalled into a 995 // JSON string before being set as the given `annotation`. 996 func (m *MachinePoolScope) UpdateAnnotationJSON(annotation string, content map[string]interface{}) error { 997 b, err := json.Marshal(content) 998 if err != nil { 999 return err 1000 } 1001 m.SetAnnotation(annotation, string(b)) 1002 return nil 1003 } 1004 1005 // TagsSpecs returns the tags for the AzureMachinePool. 1006 func (m *MachinePoolScope) TagsSpecs() []azure.TagsSpec { 1007 return []azure.TagsSpec{ 1008 { 1009 Scope: azure.VMSSID(m.SubscriptionID(), m.NodeResourceGroup(), m.Name()), 1010 Tags: m.AdditionalTags(), 1011 Annotation: azure.VMSSTagsLastAppliedAnnotation, 1012 }, 1013 } 1014 }