sigs.k8s.io/cluster-api-provider-azure@v1.14.3/exp/controllers/azuremachinepool_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "time" 22 23 "github.com/pkg/errors" 24 corev1 "k8s.io/api/core/v1" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 "k8s.io/apimachinery/pkg/runtime" 27 kerrors "k8s.io/apimachinery/pkg/util/errors" 28 "k8s.io/client-go/tools/record" 29 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 30 "sigs.k8s.io/cluster-api-provider-azure/azure" 31 "sigs.k8s.io/cluster-api-provider-azure/azure/scope" 32 infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers" 33 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 34 "sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing" 35 "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" 36 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 37 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 38 kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" 39 capierrors "sigs.k8s.io/cluster-api/errors" 40 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 41 "sigs.k8s.io/cluster-api/util" 42 "sigs.k8s.io/cluster-api/util/annotations" 43 "sigs.k8s.io/cluster-api/util/predicates" 44 ctrl "sigs.k8s.io/controller-runtime" 45 "sigs.k8s.io/controller-runtime/pkg/builder" 46 "sigs.k8s.io/controller-runtime/pkg/client" 47 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 48 "sigs.k8s.io/controller-runtime/pkg/handler" 49 "sigs.k8s.io/controller-runtime/pkg/predicate" 50 "sigs.k8s.io/controller-runtime/pkg/reconcile" 51 "sigs.k8s.io/controller-runtime/pkg/source" 52 ) 53 54 type ( 55 // AzureMachinePoolReconciler reconciles an AzureMachinePool object. 56 AzureMachinePoolReconciler struct { 57 client.Client 58 Scheme *runtime.Scheme 59 Recorder record.EventRecorder 60 Timeouts reconciler.Timeouts 61 WatchFilterValue string 62 createAzureMachinePoolService azureMachinePoolServiceCreator 63 } 64 65 // annotationReaderWriter provides an interface to read and write annotations. 66 annotationReaderWriter interface { 67 GetAnnotations() map[string]string 68 SetAnnotations(annotations map[string]string) 69 } 70 ) 71 72 type azureMachinePoolServiceCreator func(machinePoolScope *scope.MachinePoolScope) (*azureMachinePoolService, error) 73 74 // NewAzureMachinePoolReconciler returns a new AzureMachinePoolReconciler instance. 75 func NewAzureMachinePoolReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureMachinePoolReconciler { 76 ampr := &AzureMachinePoolReconciler{ 77 Client: client, 78 Recorder: recorder, 79 Timeouts: timeouts, 80 WatchFilterValue: watchFilterValue, 81 } 82 83 ampr.createAzureMachinePoolService = newAzureMachinePoolService 84 85 return ampr 86 } 87 88 // SetupWithManager initializes this controller with a manager. 89 func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options infracontroller.Options) error { 90 ctx, log, done := tele.StartSpanWithLogger(ctx, 91 "controllers.AzureMachinePoolReconciler.SetupWithManager", 92 tele.KVP("controller", "AzureMachinePool"), 93 ) 94 defer done() 95 96 var r reconcile.Reconciler = ampr 97 if options.Cache != nil { 98 r = coalescing.NewReconciler(ampr, options.Cache, log) 99 } 100 101 // create mappers to transform incoming AzureClusters and AzureManagedClusters into AzureMachinePool requests 102 azureClusterMapper, err := AzureClusterToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log) 103 if err != nil { 104 return errors.Wrapf(err, "failed to create AzureCluster to AzureMachinePools mapper") 105 } 106 azureManagedControlPlaneMapper, err := AzureManagedControlPlaneToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log) 107 if err != nil { 108 return errors.Wrapf(err, "failed to create AzureManagedCluster to AzureMachinePools mapper") 109 } 110 111 c, err := ctrl.NewControllerManagedBy(mgr). 112 WithOptions(options.Options). 113 For(&infrav1exp.AzureMachinePool{}). 114 WithEventFilter(predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue)). 115 // watch for changes in CAPI MachinePool resources 116 Watches( 117 &expv1.MachinePool{}, 118 handler.EnqueueRequestsFromMapFunc(MachinePoolToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind(infrav1.AzureMachinePoolKind), log)), 119 ). 120 // watch for changes in AzureCluster resources 121 Watches( 122 &infrav1.AzureCluster{}, 123 handler.EnqueueRequestsFromMapFunc(azureClusterMapper), 124 ). 125 // watch for changes in AzureManagedControlPlane resources 126 Watches( 127 &infrav1.AzureManagedControlPlane{}, 128 handler.EnqueueRequestsFromMapFunc(azureManagedControlPlaneMapper), 129 ). 130 // watch for changes in KubeadmConfig to sync bootstrap token 131 Watches( 132 &kubeadmv1.KubeadmConfig{}, 133 handler.EnqueueRequestsFromMapFunc(KubeadmConfigToInfrastructureMapFunc(ctx, ampr.Client, log)), 134 builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), 135 ). 136 Build(r) 137 if err != nil { 138 return errors.Wrap(err, "error creating controller") 139 } 140 141 if err := c.Watch( 142 source.Kind(mgr.GetCache(), &infrav1exp.AzureMachinePoolMachine{}), 143 handler.EnqueueRequestsFromMapFunc(AzureMachinePoolMachineMapper(mgr.GetScheme(), log)), 144 MachinePoolMachineHasStateOrVersionChange(log), 145 predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue), 146 ); err != nil { 147 return errors.Wrap(err, "failed adding a watch for AzureMachinePoolMachine") 148 } 149 150 azureMachinePoolMapper, err := util.ClusterToTypedObjectsMapper(ampr.Client, &infrav1exp.AzureMachinePoolList{}, mgr.GetScheme()) 151 if err != nil { 152 return errors.Wrap(err, "failed to create mapper for Cluster to AzureMachines") 153 } 154 155 // Add a watch on clusterv1.Cluster object for unpause & ready notifications. 156 if err := c.Watch( 157 source.Kind(mgr.GetCache(), &clusterv1.Cluster{}), 158 handler.EnqueueRequestsFromMapFunc(azureMachinePoolMapper), 159 infracontroller.ClusterPauseChangeAndInfrastructureReady(log), 160 predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue), 161 ); err != nil { 162 return errors.Wrap(err, "failed adding a watch for ready clusters") 163 } 164 165 return nil 166 } 167 168 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch;create;update;patch;delete 169 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get;update;patch 170 // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch 171 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete 172 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get 173 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch 174 // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch 175 // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch 176 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch 177 178 // Reconcile idempotently gets, creates, and updates a machine pool. 179 func (ampr *AzureMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 180 ctx, logger, done := tele.StartSpanWithLogger( 181 ctx, 182 "controllers.AzureMachinePoolReconciler.Reconcile", 183 tele.KVP("namespace", req.Namespace), 184 tele.KVP("name", req.Name), 185 tele.KVP("kind", infrav1.AzureMachinePoolKind), 186 ) 187 defer done() 188 ctx, cancel := context.WithTimeout(ctx, ampr.Timeouts.DefaultedLoopTimeout()) 189 defer cancel() 190 191 logger = logger.WithValues("namespace", req.Namespace, "azureMachinePool", req.Name) 192 193 azMachinePool := &infrav1exp.AzureMachinePool{} 194 err := ampr.Get(ctx, req.NamespacedName, azMachinePool) 195 if err != nil { 196 if apierrors.IsNotFound(err) { 197 return reconcile.Result{}, nil 198 } 199 return reconcile.Result{}, err 200 } 201 202 // Fetch the CAPI MachinePool. 203 machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampr.Client, azMachinePool.ObjectMeta) 204 if err != nil { 205 return reconcile.Result{}, err 206 } 207 if machinePool == nil { 208 logger.V(2).Info("MachinePool Controller has not yet set OwnerRef") 209 return reconcile.Result{}, nil 210 } 211 212 logger = logger.WithValues("machinePool", machinePool.Name) 213 214 // Fetch the Cluster. 215 cluster, err := util.GetClusterFromMetadata(ctx, ampr.Client, machinePool.ObjectMeta) 216 if err != nil { 217 logger.V(2).Info("MachinePool is missing cluster label or cluster does not exist") 218 return reconcile.Result{}, nil 219 } 220 221 logger = logger.WithValues("cluster", cluster.Name) 222 223 clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampr.Client, cluster, ampr.Timeouts) 224 if err != nil { 225 return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name) 226 } 227 228 // Create the machine pool scope 229 machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{ 230 Client: ampr.Client, 231 MachinePool: machinePool, 232 AzureMachinePool: azMachinePool, 233 ClusterScope: clusterScope, 234 }) 235 if err != nil { 236 return reconcile.Result{}, errors.Wrap(err, "failed to create machinepool scope") 237 } 238 239 // Always close the scope when exiting this function so we can persist any AzureMachine changes. 240 defer func() { 241 if err := machinePoolScope.Close(ctx); err != nil && reterr == nil { 242 reterr = err 243 } 244 }() 245 246 // Return early if the object or Cluster is paused. 247 if annotations.IsPaused(cluster, azMachinePool) { 248 logger.V(2).Info("AzureMachinePool or linked Cluster is marked as paused. Won't reconcile normally") 249 return ampr.reconcilePause(ctx, machinePoolScope) 250 } 251 252 // Handle deleted machine pools 253 if !azMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { 254 return ampr.reconcileDelete(ctx, machinePoolScope, clusterScope) 255 } 256 257 // Handle non-deleted machine pools 258 return ampr.reconcileNormal(ctx, machinePoolScope, cluster) 259 } 260 261 func (ampr *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) { 262 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileNormal") 263 defer done() 264 265 log.Info("Reconciling AzureMachinePool") 266 267 // If the AzureMachine is in an error state, return early. 268 if machinePoolScope.AzureMachinePool.Status.FailureReason != nil || machinePoolScope.AzureMachinePool.Status.FailureMessage != nil { 269 log.Info("Error state detected, skipping reconciliation") 270 return reconcile.Result{}, nil 271 } 272 273 // Register the finalizer immediately to avoid orphaning Azure resources on delete 274 needsPatch := controllerutil.AddFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) 275 needsPatch = machinePoolScope.SetInfrastructureMachineKind() || needsPatch 276 // Register the block-move annotation immediately to avoid moving un-paused ASO resources 277 needsPatch = infracontroller.AddBlockMoveAnnotation(machinePoolScope.AzureMachinePool) || needsPatch 278 if needsPatch { 279 if err := machinePoolScope.PatchObject(ctx); err != nil { 280 return reconcile.Result{}, err 281 } 282 } 283 284 if !cluster.Status.InfrastructureReady { 285 log.Info("Cluster infrastructure is not ready yet") 286 return reconcile.Result{}, nil 287 } 288 289 // Make sure bootstrap data is available and populated. 290 if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { 291 log.Info("Bootstrap data secret reference is not yet available") 292 return reconcile.Result{}, nil 293 } 294 295 var reconcileError azure.ReconcileError 296 297 // Initialize the cache to be used by the AzureMachine services. 298 err := machinePoolScope.InitMachinePoolCache(ctx) 299 if err != nil { 300 if errors.As(err, &reconcileError) && reconcileError.IsTerminal() { 301 ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "SKUNotFound", errors.Wrap(err, "failed to initialize machinepool cache").Error()) 302 log.Error(err, "Failed to initialize machinepool cache") 303 machinePoolScope.SetFailureReason(capierrors.InvalidConfigurationMachineError) 304 machinePoolScope.SetFailureMessage(err) 305 machinePoolScope.SetNotReady() 306 return reconcile.Result{}, nil 307 } 308 return reconcile.Result{}, errors.Wrap(err, "failed to init machinepool scope cache") 309 } 310 311 ams, err := ampr.createAzureMachinePoolService(machinePoolScope) 312 if err != nil { 313 return reconcile.Result{}, errors.Wrap(err, "failed creating a newAzureMachinePoolService") 314 } 315 316 if err := ams.Reconcile(ctx); err != nil { 317 // Handle transient and terminal errors 318 var reconcileError azure.ReconcileError 319 if errors.As(err, &reconcileError) { 320 if reconcileError.IsTerminal() { 321 log.Error(err, "failed to reconcile AzureMachinePool", "name", machinePoolScope.Name()) 322 return reconcile.Result{}, nil 323 } 324 325 if reconcileError.IsTransient() { 326 log.Error(err, "failed to reconcile AzureMachinePool", "name", machinePoolScope.Name()) 327 return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil 328 } 329 330 return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachinePool") 331 } 332 333 return reconcile.Result{}, err 334 } 335 336 log.V(2).Info("Scale Set reconciled", "id", 337 machinePoolScope.ProviderID(), "state", machinePoolScope.ProvisioningState()) 338 339 switch machinePoolScope.ProvisioningState() { 340 case infrav1.Deleting: 341 log.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID()) 342 ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion") 343 case infrav1.Failed: 344 log.Info("Unexpected scale set failure", "id", machinePoolScope.ProviderID()) 345 ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMFailure", "Unexpected Azure scale set failure") 346 } 347 348 if machinePoolScope.NeedsRequeue() { 349 return reconcile.Result{ 350 RequeueAfter: 30 * time.Second, 351 }, nil 352 } 353 354 return reconcile.Result{}, nil 355 } 356 357 //nolint:unparam // Always returns an empty struct for reconcile.Result 358 func (ampr *AzureMachinePoolReconciler) reconcilePause(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (reconcile.Result, error) { 359 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcilePause") 360 defer done() 361 362 log.Info("Reconciling AzureMachinePool pause") 363 364 amps, err := ampr.createAzureMachinePoolService(machinePoolScope) 365 if err != nil { 366 return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService") 367 } 368 369 if err := amps.Pause(ctx); err != nil { 370 return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) 371 } 372 infracontroller.RemoveBlockMoveAnnotation(machinePoolScope.AzureMachinePool) 373 374 return reconcile.Result{}, nil 375 } 376 377 //nolint:unparam // Always returns an empty struct for reconcile.Result 378 func (ampr *AzureMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope infracontroller.ClusterScoper) (reconcile.Result, error) { 379 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileDelete") 380 defer done() 381 382 log.V(2).Info("handling deleted AzureMachinePool") 383 384 if infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) { 385 amps, err := ampr.createAzureMachinePoolService(machinePoolScope) 386 if err != nil { 387 return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService") 388 } 389 390 log.V(4).Info("deleting AzureMachinePool resource individually") 391 if err := amps.Delete(ctx); err != nil { 392 return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) 393 } 394 } 395 396 // Block deletion until all AzureMachinePoolMachines are finished deleting. 397 ampms, err := machinePoolScope.GetMachinePoolMachines(ctx) 398 if err != nil { 399 return reconcile.Result{}, errors.Wrapf(err, "error finding AzureMachinePoolMachines while deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) 400 } 401 402 if len(ampms) > 0 { 403 log.Info("AzureMachinePool still has dependent AzureMachinePoolMachines, deleting them first and requeing", "count", len(ampms)) 404 405 var errs []error 406 407 for _, ampm := range ampms { 408 if !ampm.GetDeletionTimestamp().IsZero() { 409 // Don't handle deleted child 410 continue 411 } 412 413 if err := machinePoolScope.DeleteMachine(ctx, ampm); err != nil { 414 err = errors.Wrapf(err, "error deleting AzureMachinePool %s/%s: failed to delete %s %s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.AzureMachinePool.Name, ampm.Namespace, ampm.Name) 415 log.Error(err, "Error deleting AzureMachinePoolMachine", "namespace", ampm.Namespace, "name", ampm.Name) 416 errs = append(errs, err) 417 } 418 } 419 420 if len(errs) > 0 { 421 return ctrl.Result{}, kerrors.NewAggregate(errs) 422 } 423 424 return reconcile.Result{}, nil 425 } 426 427 // Delete succeeded, remove finalizer 428 log.V(4).Info("removing finalizer for AzureMachinePool") 429 controllerutil.RemoveFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) 430 return reconcile.Result{}, nil 431 }