sigs.k8s.io/cluster-api-provider-azure@v1.17.0/exp/controllers/azuremachinepool_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 "reflect" 23 "time" 24 25 "github.com/pkg/errors" 26 corev1 "k8s.io/api/core/v1" 27 apierrors "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/runtime" 30 "k8s.io/apimachinery/pkg/runtime/schema" 31 kerrors "k8s.io/apimachinery/pkg/util/errors" 32 "k8s.io/client-go/tools/record" 33 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 34 "sigs.k8s.io/cluster-api-provider-azure/azure" 35 "sigs.k8s.io/cluster-api-provider-azure/azure/scope" 36 infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers" 37 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 38 "sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing" 39 "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" 40 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 41 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 42 kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" 43 capierrors "sigs.k8s.io/cluster-api/errors" 44 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 45 "sigs.k8s.io/cluster-api/util" 46 "sigs.k8s.io/cluster-api/util/annotations" 47 "sigs.k8s.io/cluster-api/util/predicates" 48 ctrl "sigs.k8s.io/controller-runtime" 49 "sigs.k8s.io/controller-runtime/pkg/builder" 50 "sigs.k8s.io/controller-runtime/pkg/client" 51 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 52 "sigs.k8s.io/controller-runtime/pkg/handler" 53 "sigs.k8s.io/controller-runtime/pkg/predicate" 54 "sigs.k8s.io/controller-runtime/pkg/reconcile" 55 ) 56 57 type ( 58 // AzureMachinePoolReconciler reconciles an AzureMachinePool object. 59 AzureMachinePoolReconciler struct { 60 client.Client 61 Scheme *runtime.Scheme 62 Recorder record.EventRecorder 63 Timeouts reconciler.Timeouts 64 WatchFilterValue string 65 createAzureMachinePoolService azureMachinePoolServiceCreator 66 BootstrapConfigGVK schema.GroupVersionKind 67 } 68 69 // annotationReaderWriter provides an interface to read and write annotations. 70 annotationReaderWriter interface { 71 GetAnnotations() map[string]string 72 SetAnnotations(annotations map[string]string) 73 } 74 ) 75 76 type azureMachinePoolServiceCreator func(machinePoolScope *scope.MachinePoolScope) (*azureMachinePoolService, error) 77 78 // NewAzureMachinePoolReconciler returns a new AzureMachinePoolReconciler instance. 79 func NewAzureMachinePoolReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue, bootstrapConfigGVK string) *AzureMachinePoolReconciler { 80 gvk := schema.FromAPIVersionAndKind(kubeadmv1.GroupVersion.String(), reflect.TypeOf((*kubeadmv1.KubeadmConfig)(nil)).Elem().Name()) 81 userGVK, _ := schema.ParseKindArg(bootstrapConfigGVK) 82 83 if userGVK != nil { 84 gvk = *userGVK 85 } 86 87 ampr := &AzureMachinePoolReconciler{ 88 Client: client, 89 Recorder: recorder, 90 Timeouts: timeouts, 91 WatchFilterValue: watchFilterValue, 92 BootstrapConfigGVK: gvk, 93 } 94 95 ampr.createAzureMachinePoolService = newAzureMachinePoolService 96 97 return ampr 98 } 99 100 // SetupWithManager initializes this controller with a manager. 101 func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options infracontroller.Options) error { 102 ctx, log, done := tele.StartSpanWithLogger(ctx, 103 "controllers.AzureMachinePoolReconciler.SetupWithManager", 104 tele.KVP("controller", "AzureMachinePool"), 105 ) 106 defer done() 107 108 var r reconcile.Reconciler = ampr 109 if options.Cache != nil { 110 r = coalescing.NewReconciler(ampr, options.Cache, log) 111 } 112 113 // create mappers to transform incoming AzureClusters and AzureManagedClusters into AzureMachinePool requests 114 azureClusterMapper, err := AzureClusterToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log) 115 if err != nil { 116 return errors.Wrapf(err, "failed to create AzureCluster to AzureMachinePools mapper") 117 } 118 azureManagedControlPlaneMapper, err := AzureManagedControlPlaneToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log) 119 if err != nil { 120 return errors.Wrapf(err, "failed to create AzureManagedCluster to AzureMachinePools mapper") 121 } 122 123 azureMachinePoolMapper, err := util.ClusterToTypedObjectsMapper(ampr.Client, &infrav1exp.AzureMachinePoolList{}, mgr.GetScheme()) 124 if err != nil { 125 return errors.Wrap(err, "failed to create mapper for Cluster to AzureMachines") 126 } 127 128 config := &metav1.PartialObjectMetadata{} 129 config.SetGroupVersionKind(ampr.BootstrapConfigGVK) 130 return ctrl.NewControllerManagedBy(mgr). 131 WithOptions(options.Options). 132 For(&infrav1exp.AzureMachinePool{}). 133 WithEventFilter(predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue)). 134 // watch for changes in CAPI MachinePool resources 135 Watches( 136 &expv1.MachinePool{}, 137 handler.EnqueueRequestsFromMapFunc(MachinePoolToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind(infrav1.AzureMachinePoolKind), log)), 138 ). 139 // watch for changes in AzureCluster resources 140 Watches( 141 &infrav1.AzureCluster{}, 142 handler.EnqueueRequestsFromMapFunc(azureClusterMapper), 143 ). 144 // watch for changes in AzureManagedControlPlane resources 145 Watches( 146 &infrav1.AzureManagedControlPlane{}, 147 handler.EnqueueRequestsFromMapFunc(azureManagedControlPlaneMapper), 148 ). 149 // watch for changes in KubeadmConfig (or any BootstrapConfig) to sync bootstrap token 150 Watches( 151 config, 152 handler.EnqueueRequestsFromMapFunc(BootstrapConfigToInfrastructureMapFunc(ctx, ampr.Client, log)), 153 builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), 154 ). 155 Watches( 156 &infrav1exp.AzureMachinePoolMachine{}, 157 handler.EnqueueRequestsFromMapFunc(AzureMachinePoolMachineMapper(mgr.GetScheme(), log)), 158 builder.WithPredicates( 159 MachinePoolMachineHasStateOrVersionChange(log), 160 predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue), 161 ), 162 ). 163 // Add a watch on clusterv1.Cluster object for unpause & ready notifications. 164 Watches( 165 &clusterv1.Cluster{}, 166 handler.EnqueueRequestsFromMapFunc(azureMachinePoolMapper), 167 builder.WithPredicates( 168 infracontroller.ClusterPauseChangeAndInfrastructureReady(log), 169 predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue), 170 ), 171 ). 172 Complete(r) 173 } 174 175 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch;create;update;patch;delete 176 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get;update;patch 177 // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch 178 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete 179 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get 180 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch 181 // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch 182 // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch 183 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch 184 185 // Reconcile idempotently gets, creates, and updates a machine pool. 186 func (ampr *AzureMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 187 ctx, logger, done := tele.StartSpanWithLogger( 188 ctx, 189 "controllers.AzureMachinePoolReconciler.Reconcile", 190 tele.KVP("namespace", req.Namespace), 191 tele.KVP("name", req.Name), 192 tele.KVP("kind", infrav1.AzureMachinePoolKind), 193 ) 194 defer done() 195 ctx, cancel := context.WithTimeout(ctx, ampr.Timeouts.DefaultedLoopTimeout()) 196 defer cancel() 197 198 logger = logger.WithValues("namespace", req.Namespace, "azureMachinePool", req.Name) 199 200 azMachinePool := &infrav1exp.AzureMachinePool{} 201 err := ampr.Get(ctx, req.NamespacedName, azMachinePool) 202 if err != nil { 203 if apierrors.IsNotFound(err) { 204 return reconcile.Result{}, nil 205 } 206 return reconcile.Result{}, err 207 } 208 209 // Fetch the CAPI MachinePool. 210 machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampr.Client, azMachinePool.ObjectMeta) 211 if err != nil { 212 return reconcile.Result{}, err 213 } 214 if machinePool == nil { 215 logger.V(2).Info("MachinePool Controller has not yet set OwnerRef") 216 return reconcile.Result{}, nil 217 } 218 219 logger = logger.WithValues("machinePool", machinePool.Name) 220 221 // Fetch the Cluster. 222 cluster, err := util.GetClusterFromMetadata(ctx, ampr.Client, machinePool.ObjectMeta) 223 if err != nil { 224 logger.V(2).Info("MachinePool is missing cluster label or cluster does not exist") 225 return reconcile.Result{}, nil 226 } 227 228 logger = logger.WithValues("cluster", cluster.Name) 229 230 clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampr.Client, cluster, ampr.Timeouts) 231 if err != nil { 232 return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name) 233 } 234 235 // Create the machine pool scope 236 machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{ 237 Client: ampr.Client, 238 MachinePool: machinePool, 239 AzureMachinePool: azMachinePool, 240 ClusterScope: clusterScope, 241 }) 242 if err != nil { 243 return reconcile.Result{}, errors.Wrap(err, "failed to create machinepool scope") 244 } 245 246 // Always close the scope when exiting this function so we can persist any AzureMachine changes. 247 defer func() { 248 if err := machinePoolScope.Close(ctx); err != nil && reterr == nil { 249 reterr = err 250 } 251 }() 252 253 // Return early if the object or Cluster is paused. 254 if annotations.IsPaused(cluster, azMachinePool) { 255 logger.V(2).Info("AzureMachinePool or linked Cluster is marked as paused. Won't reconcile normally") 256 return ampr.reconcilePause(ctx, machinePoolScope) 257 } 258 259 // Handle deleted machine pools 260 if !azMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { 261 return ampr.reconcileDelete(ctx, machinePoolScope, clusterScope) 262 } 263 264 // Handle non-deleted machine pools 265 return ampr.reconcileNormal(ctx, machinePoolScope, cluster) 266 } 267 268 func (ampr *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) { 269 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileNormal") 270 defer done() 271 272 log.Info("Reconciling AzureMachinePool") 273 274 // If the AzureMachine is in an error state, return early. 275 if machinePoolScope.AzureMachinePool.Status.FailureReason != nil || machinePoolScope.AzureMachinePool.Status.FailureMessage != nil { 276 log.Info("Error state detected, skipping reconciliation") 277 return reconcile.Result{}, nil 278 } 279 280 // Register the finalizer immediately to avoid orphaning Azure resources on delete 281 needsPatch := controllerutil.AddFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) 282 needsPatch = machinePoolScope.SetInfrastructureMachineKind() || needsPatch 283 // Register the block-move annotation immediately to avoid moving un-paused ASO resources 284 needsPatch = infracontroller.AddBlockMoveAnnotation(machinePoolScope.AzureMachinePool) || needsPatch 285 if needsPatch { 286 if err := machinePoolScope.PatchObject(ctx); err != nil { 287 return reconcile.Result{}, err 288 } 289 } 290 291 if !cluster.Status.InfrastructureReady { 292 log.Info("Cluster infrastructure is not ready yet") 293 return reconcile.Result{}, nil 294 } 295 296 // Make sure bootstrap data is available and populated. 297 if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { 298 log.Info("Bootstrap data secret reference is not yet available") 299 return reconcile.Result{}, nil 300 } 301 302 var reconcileError azure.ReconcileError 303 304 // Initialize the cache to be used by the AzureMachine services. 305 err := machinePoolScope.InitMachinePoolCache(ctx) 306 if err != nil { 307 if errors.As(err, &reconcileError) && reconcileError.IsTerminal() { 308 ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "SKUNotFound", errors.Wrap(err, "failed to initialize machinepool cache").Error()) 309 log.Error(err, "Failed to initialize machinepool cache") 310 machinePoolScope.SetFailureReason(capierrors.InvalidConfigurationMachineError) 311 machinePoolScope.SetFailureMessage(err) 312 machinePoolScope.SetNotReady() 313 return reconcile.Result{}, nil 314 } 315 return reconcile.Result{}, errors.Wrap(err, "failed to init machinepool scope cache") 316 } 317 318 ams, err := ampr.createAzureMachinePoolService(machinePoolScope) 319 if err != nil { 320 return reconcile.Result{}, errors.Wrap(err, "failed creating a newAzureMachinePoolService") 321 } 322 323 if err := ams.Reconcile(ctx); err != nil { 324 // Handle transient and terminal errors 325 var reconcileError azure.ReconcileError 326 if errors.As(err, &reconcileError) { 327 if reconcileError.IsTerminal() { 328 log.Error(err, "failed to reconcile AzureMachinePool", "name", machinePoolScope.Name()) 329 return reconcile.Result{}, nil 330 } 331 332 if reconcileError.IsTransient() { 333 if azure.IsOperationNotDoneError(reconcileError) { 334 log.V(2).Info(fmt.Sprintf("AzureMachinePool reconcile not done: %s", reconcileError.Error())) 335 } else { 336 log.V(2).Info(fmt.Sprintf("transient failure to reconcile AzureMachinePool, retrying: %s", reconcileError.Error())) 337 } 338 return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil 339 } 340 341 return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachinePool") 342 } 343 344 return reconcile.Result{}, err 345 } 346 347 log.V(2).Info("Scale Set reconciled", "id", 348 machinePoolScope.ProviderID(), "state", machinePoolScope.ProvisioningState()) 349 350 switch machinePoolScope.ProvisioningState() { 351 case infrav1.Deleting: 352 log.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID()) 353 ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion") 354 case infrav1.Failed: 355 log.Info("Unexpected scale set failure", "id", machinePoolScope.ProviderID()) 356 ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMFailure", "Unexpected Azure scale set failure") 357 } 358 359 if machinePoolScope.NeedsRequeue() { 360 return reconcile.Result{ 361 RequeueAfter: 30 * time.Second, 362 }, nil 363 } 364 365 return reconcile.Result{}, nil 366 } 367 368 //nolint:unparam // Always returns an empty struct for reconcile.Result 369 func (ampr *AzureMachinePoolReconciler) reconcilePause(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (reconcile.Result, error) { 370 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcilePause") 371 defer done() 372 373 log.Info("Reconciling AzureMachinePool pause") 374 375 amps, err := ampr.createAzureMachinePoolService(machinePoolScope) 376 if err != nil { 377 return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService") 378 } 379 380 if err := amps.Pause(ctx); err != nil { 381 return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) 382 } 383 infracontroller.RemoveBlockMoveAnnotation(machinePoolScope.AzureMachinePool) 384 385 return reconcile.Result{}, nil 386 } 387 388 //nolint:unparam // Always returns an empty struct for reconcile.Result 389 func (ampr *AzureMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope infracontroller.ClusterScoper) (reconcile.Result, error) { 390 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileDelete") 391 defer done() 392 393 log.V(2).Info("handling deleted AzureMachinePool") 394 395 if infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) { 396 amps, err := ampr.createAzureMachinePoolService(machinePoolScope) 397 if err != nil { 398 return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService") 399 } 400 401 log.V(4).Info("deleting AzureMachinePool resource individually") 402 if err := amps.Delete(ctx); err != nil { 403 return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) 404 } 405 } 406 407 // Block deletion until all AzureMachinePoolMachines are finished deleting. 408 ampms, err := machinePoolScope.GetMachinePoolMachines(ctx) 409 if err != nil { 410 return reconcile.Result{}, errors.Wrapf(err, "error finding AzureMachinePoolMachines while deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) 411 } 412 413 if len(ampms) > 0 { 414 log.Info("AzureMachinePool still has dependent AzureMachinePoolMachines, deleting them first and requeing", "count", len(ampms)) 415 416 var errs []error 417 418 for _, ampm := range ampms { 419 if !ampm.GetDeletionTimestamp().IsZero() { 420 // Don't handle deleted child 421 continue 422 } 423 424 if err := machinePoolScope.DeleteMachine(ctx, ampm); err != nil { 425 err = errors.Wrapf(err, "error deleting AzureMachinePool %s/%s: failed to delete %s %s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.AzureMachinePool.Name, ampm.Namespace, ampm.Name) 426 log.Error(err, "Error deleting AzureMachinePoolMachine", "namespace", ampm.Namespace, "name", ampm.Name) 427 errs = append(errs, err) 428 } 429 } 430 431 if len(errs) > 0 { 432 return ctrl.Result{}, kerrors.NewAggregate(errs) 433 } 434 435 return reconcile.Result{}, nil 436 } 437 438 // Delete succeeded, remove finalizer 439 log.V(4).Info("removing finalizer for AzureMachinePool") 440 controllerutil.RemoveFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) 441 return reconcile.Result{}, nil 442 }