sigs.k8s.io/cluster-api-provider-azure@v1.17.0/exp/controllers/azuremachinepoolmachine_controller.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "github.com/pkg/errors" 25 corev1 "k8s.io/api/core/v1" 26 apierrors "k8s.io/apimachinery/pkg/api/errors" 27 "k8s.io/apimachinery/pkg/runtime" 28 "k8s.io/client-go/tools/record" 29 infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" 30 "sigs.k8s.io/cluster-api-provider-azure/azure" 31 "sigs.k8s.io/cluster-api-provider-azure/azure/scope" 32 "sigs.k8s.io/cluster-api-provider-azure/azure/services/scalesetvms" 33 infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers" 34 infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" 35 "sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing" 36 "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" 37 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 38 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 39 capierrors "sigs.k8s.io/cluster-api/errors" 40 "sigs.k8s.io/cluster-api/util" 41 "sigs.k8s.io/cluster-api/util/annotations" 42 "sigs.k8s.io/cluster-api/util/conditions" 43 "sigs.k8s.io/cluster-api/util/predicates" 44 ctrl "sigs.k8s.io/controller-runtime" 45 "sigs.k8s.io/controller-runtime/pkg/builder" 46 "sigs.k8s.io/controller-runtime/pkg/client" 47 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 48 "sigs.k8s.io/controller-runtime/pkg/handler" 49 "sigs.k8s.io/controller-runtime/pkg/reconcile" 50 ) 51 52 type ( 53 azureMachinePoolMachineReconcilerFactory func(*scope.MachinePoolMachineScope) (azure.Reconciler, error) 54 55 // AzureMachinePoolMachineController handles Kubernetes change events for AzureMachinePoolMachine resources. 56 AzureMachinePoolMachineController struct { 57 client.Client 58 Scheme *runtime.Scheme 59 Recorder record.EventRecorder 60 Timeouts reconciler.Timeouts 61 WatchFilterValue string 62 reconcilerFactory azureMachinePoolMachineReconcilerFactory 63 } 64 65 azureMachinePoolMachineReconciler struct { 66 Scope *scope.MachinePoolMachineScope 67 scalesetVMsService *scalesetvms.Service 68 } 69 ) 70 71 // NewAzureMachinePoolMachineController creates a new AzureMachinePoolMachineController to handle updates to Azure Machine Pool Machines. 72 func NewAzureMachinePoolMachineController(c client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureMachinePoolMachineController { 73 return &AzureMachinePoolMachineController{ 74 Client: c, 75 Recorder: recorder, 76 Timeouts: timeouts, 77 WatchFilterValue: watchFilterValue, 78 reconcilerFactory: newAzureMachinePoolMachineReconciler, 79 } 80 } 81 82 // SetupWithManager initializes this controller with a manager. 83 func (ampmr *AzureMachinePoolMachineController) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options infracontroller.Options) error { 84 ctx, log, done := tele.StartSpanWithLogger(ctx, 85 "controllers.AzureMachinePoolMachineController.SetupWithManager", 86 tele.KVP("controller", "AzureMachinePoolMachine"), 87 ) 88 defer done() 89 90 var r reconcile.Reconciler = ampmr 91 if options.Cache != nil { 92 r = coalescing.NewReconciler(ampmr, options.Cache, log) 93 } 94 95 return ctrl.NewControllerManagedBy(mgr). 96 WithOptions(options.Options). 97 For(&infrav1exp.AzureMachinePoolMachine{}). 98 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue)). 99 // Add a watch on AzureMachinePool for model changes 100 Watches( 101 &infrav1exp.AzureMachinePool{}, 102 handler.EnqueueRequestsFromMapFunc(AzureMachinePoolToAzureMachinePoolMachines(ctx, mgr.GetClient(), log)), 103 builder.WithPredicates( 104 MachinePoolModelHasChanged(log), 105 predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue), 106 ), 107 ). 108 // Add a watch on CAPI Machines for MachinePool Machines 109 Watches( 110 &clusterv1.Machine{}, 111 handler.EnqueueRequestsFromMapFunc(util.MachineToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind("AzureMachinePoolMachine"))), 112 builder.WithPredicates( 113 predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue), 114 ), 115 ). 116 Complete(r) 117 } 118 119 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch 120 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get 121 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete 122 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get;update;patch 123 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get 124 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;delete 125 // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch 126 // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch 127 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch 128 129 // Reconcile idempotently gets, creates, and updates a machine pool. 130 func (ampmr *AzureMachinePoolMachineController) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 131 ctx, logger, done := tele.StartSpanWithLogger( 132 ctx, 133 "controllers.AzureMachinePoolMachineController.Reconcile", 134 tele.KVP("namespace", req.Namespace), 135 tele.KVP("name", req.Name), 136 tele.KVP("kind", "AzureMachinePoolMachine"), 137 ) 138 defer done() 139 140 logger = logger.WithValues("namespace", req.Namespace, "azureMachinePoolMachine", req.Name) 141 142 ctx, cancel := context.WithTimeout(ctx, ampmr.Timeouts.DefaultedLoopTimeout()) 143 defer cancel() 144 145 azureMachine := &infrav1exp.AzureMachinePoolMachine{} 146 err := ampmr.Get(ctx, req.NamespacedName, azureMachine) 147 if err != nil { 148 if apierrors.IsNotFound(err) { 149 return reconcile.Result{}, nil 150 } 151 return reconcile.Result{}, err 152 } 153 logger.V(2).Info("Fetching cluster for AzureMachinePoolMachine", "ampm", azureMachine.Name) 154 155 // Fetch the Cluster. 156 cluster, err := util.GetClusterFromMetadata(ctx, ampmr.Client, azureMachine.ObjectMeta) 157 if err != nil { 158 logger.Info("AzureMachinePoolMachine is missing cluster label or cluster does not exist") 159 return reconcile.Result{}, nil 160 } 161 162 logger = logger.WithValues("cluster", cluster.Name) 163 164 // Return early if the object or Cluster is paused. 165 if annotations.IsPaused(cluster, azureMachine) { 166 logger.Info("AzureMachinePoolMachine or linked Cluster is marked as paused. Won't reconcile") 167 return ctrl.Result{}, nil 168 } 169 170 clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampmr.Client, cluster, ampmr.Timeouts) 171 if err != nil { 172 return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name) 173 } 174 175 logger.V(2).Info("Fetching AzureMachinePool with object meta", "meta", azureMachine.ObjectMeta) 176 // Fetch the owning AzureMachinePool (VMSS) 177 azureMachinePool, err := infracontroller.GetOwnerAzureMachinePool(ctx, ampmr.Client, azureMachine.ObjectMeta) 178 if err != nil { 179 if apierrors.IsNotFound(err) { 180 logger.Info("AzureMachinePool not found error missing, removing finalizer", "azureMachinePoolMachine", azureMachine.Name) 181 controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer) 182 return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine) 183 } 184 return reconcile.Result{}, err 185 } 186 if azureMachinePool == nil { 187 logger.Info("AzureMachinePool not found error missing, removing finalizer", "azureMachinePoolMachine", azureMachine.Name) 188 controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer) 189 return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine) 190 } 191 192 logger = logger.WithValues("azureMachinePool", azureMachinePool.Name) 193 194 // Fetch the CAPI MachinePool. 195 machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampmr.Client, azureMachinePool.ObjectMeta) 196 if err != nil && !apierrors.IsNotFound(err) { 197 return reconcile.Result{}, err 198 } 199 200 if machinePool != nil { 201 logger = logger.WithValues("machinePool", machinePool.Name) 202 } 203 204 // Fetch the CAPI Machine. 205 machine, err := util.GetOwnerMachine(ctx, ampmr.Client, azureMachine.ObjectMeta) 206 if err != nil && !apierrors.IsNotFound(err) { 207 return reconcile.Result{}, err 208 } 209 210 switch { 211 case machine != nil: 212 logger = logger.WithValues("machine", machine.Name) 213 case !azureMachinePool.ObjectMeta.DeletionTimestamp.IsZero(): 214 logger.Info("AzureMachinePool is being deleted, removing finalizer") 215 controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer) 216 return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine) 217 default: 218 logger.Info("Waiting for Machine Controller to set OwnerRef on AzureMachinePoolMachine") 219 return reconcile.Result{}, nil 220 } 221 222 // Create the machine pool scope 223 machineScope, err := scope.NewMachinePoolMachineScope(scope.MachinePoolMachineScopeParams{ 224 Client: ampmr.Client, 225 MachinePool: machinePool, 226 AzureMachinePool: azureMachinePool, 227 AzureMachinePoolMachine: azureMachine, 228 Machine: machine, 229 ClusterScope: clusterScope, 230 }) 231 if err != nil { 232 return reconcile.Result{}, errors.Wrap(err, "failed to create scope") 233 } 234 235 // Always close the scope when exiting this function so we can persist any AzureMachine changes. 236 defer func() { 237 if err := machineScope.Close(ctx); err != nil && reterr == nil { 238 reterr = err 239 } 240 }() 241 242 // Handle deleted machine pools machine 243 if !azureMachine.ObjectMeta.DeletionTimestamp.IsZero() { 244 return ampmr.reconcileDelete(ctx, machineScope, clusterScope) 245 } 246 247 if !cluster.Status.InfrastructureReady { 248 logger.Info("Cluster infrastructure is not ready yet") 249 return reconcile.Result{}, nil 250 } 251 252 // Handle non-deleted machine pools 253 return ampmr.reconcileNormal(ctx, machineScope) 254 } 255 256 func (ampmr *AzureMachinePoolMachineController) reconcileNormal(ctx context.Context, machineScope *scope.MachinePoolMachineScope) (_ reconcile.Result, reterr error) { 257 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolMachineController.reconcileNormal") 258 defer done() 259 260 log.Info("Reconciling AzureMachinePoolMachine") 261 // If the AzureMachine is in an error state, return early. 262 if machineScope.AzureMachinePool.Status.FailureReason != nil || machineScope.AzureMachinePool.Status.FailureMessage != nil { 263 log.Info("Error state detected, skipping reconciliation") 264 return reconcile.Result{}, nil 265 } 266 267 ampms, err := ampmr.reconcilerFactory(machineScope) 268 if err != nil { 269 return reconcile.Result{}, errors.Wrap(err, "failed to create AzureMachinePoolMachine reconciler") 270 } 271 if err := ampms.Reconcile(ctx); err != nil { 272 // Handle transient and terminal errors 273 var reconcileError azure.ReconcileError 274 if errors.As(err, &reconcileError) { 275 if reconcileError.IsTerminal() { 276 log.Error(err, "failed to reconcile AzureMachinePool", "name", machineScope.Name()) 277 return reconcile.Result{}, nil 278 } 279 280 if reconcileError.IsTransient() { 281 log.V(4).Info("failed to reconcile AzureMachinePoolMachine", "name", machineScope.Name(), "transient_error", err) 282 return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil 283 } 284 285 return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachinePool") 286 } 287 288 return reconcile.Result{}, err 289 } 290 291 state := machineScope.ProvisioningState() 292 switch state { 293 case infrav1.Failed: 294 ampmr.Recorder.Eventf(machineScope.AzureMachinePoolMachine, corev1.EventTypeWarning, "FailedVMState", "Azure scale set VM is in failed state") 295 machineScope.SetFailureReason(capierrors.UpdateMachineError) 296 machineScope.SetFailureMessage(errors.Errorf("Azure VM state is %s", state)) 297 case infrav1.Deleting: 298 log.V(4).Info("deleting machine because state is Deleting", "machine", machineScope.Name()) 299 if err := ampmr.Client.Delete(ctx, machineScope.Machine); err != nil { 300 return reconcile.Result{}, errors.Wrap(err, "machine failed to be deleted when deleting") 301 } 302 } 303 304 log.V(2).Info(fmt.Sprintf("Scale Set VM is %s", state), "id", machineScope.ProviderID()) 305 306 bootstrappingCondition := conditions.Get(machineScope.AzureMachinePoolMachine, infrav1.BootstrapSucceededCondition) 307 if bootstrappingCondition != nil && bootstrappingCondition.Reason == infrav1.BootstrapFailedReason { 308 return reconcile.Result{}, nil 309 } 310 311 if !infrav1.IsTerminalProvisioningState(state) || !machineScope.IsReady() { 312 log.V(2).Info("Requeuing", "state", state, "ready", machineScope.IsReady()) 313 // we are in a non-terminal state, retry in a bit 314 return reconcile.Result{ 315 RequeueAfter: 30 * time.Second, 316 }, nil 317 } 318 319 return reconcile.Result{}, nil 320 } 321 322 func (ampmr *AzureMachinePoolMachineController) reconcileDelete(ctx context.Context, machineScope *scope.MachinePoolMachineScope, clusterScope infracontroller.ClusterScoper) (_ reconcile.Result, reterr error) { 323 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolMachineController.reconcileDelete") 324 defer done() 325 326 if !infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) { 327 log.Info("Skipping VMSS VM deletion as the whole resource group is being deleted") 328 329 controllerutil.RemoveFinalizer(machineScope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer) 330 return reconcile.Result{}, nil 331 } 332 333 if !machineScope.AzureMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { 334 log.Info("Skipping VMSS VM deletion as VMSS delete will delete individual instances") 335 336 controllerutil.RemoveFinalizer(machineScope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer) 337 return reconcile.Result{}, nil 338 } 339 340 log.Info("Deleting AzureMachinePoolMachine") 341 342 // deleting a single machine 343 // 1) delete the infrastructure, node drain already done by owner Machine 344 // 2) remove finalizer 345 346 ampms, err := ampmr.reconcilerFactory(machineScope) 347 if err != nil { 348 return reconcile.Result{}, errors.Wrap(err, "failed to create AzureMachinePoolMachine reconciler") 349 } 350 if err := ampms.Delete(ctx); err != nil { 351 // Handle transient and terminal errors 352 var reconcileError azure.ReconcileError 353 if errors.As(err, &reconcileError) { 354 if reconcileError.IsTerminal() { 355 log.Error(err, "failed to delete AzureMachinePoolMachine", "name", machineScope.Name()) 356 return reconcile.Result{}, nil 357 } 358 359 if reconcileError.IsTransient() { 360 log.V(4).Info("failed to delete AzureMachinePoolMachine", "name", machineScope.Name(), "transient_error", err) 361 return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil 362 } 363 364 return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile AzureMachinePool") 365 } 366 367 return reconcile.Result{}, err 368 } 369 370 return reconcile.Result{}, nil 371 } 372 373 func newAzureMachinePoolMachineReconciler(scope *scope.MachinePoolMachineScope) (azure.Reconciler, error) { 374 scaleSetVMsSvc, err := scalesetvms.NewService(scope) 375 if err != nil { 376 return nil, err 377 } 378 return &azureMachinePoolMachineReconciler{ 379 Scope: scope, 380 scalesetVMsService: scaleSetVMsSvc, 381 }, nil 382 } 383 384 // Reconcile will reconcile the state of the Machine Pool Machine with the state of the Azure VMSS VM. 385 func (r *azureMachinePoolMachineReconciler) Reconcile(ctx context.Context) error { 386 ctx, _, done := tele.StartSpanWithLogger(ctx, "controllers.azureMachinePoolMachineReconciler.Reconcile") 387 defer done() 388 389 if err := r.scalesetVMsService.Reconcile(ctx); err != nil { 390 return errors.Wrap(err, "failed to reconcile scalesetVMs") 391 } 392 393 if err := r.Scope.UpdateNodeStatus(ctx); err != nil { 394 return errors.Wrap(err, "failed to update VMSS VM node status") 395 } 396 397 if err := r.Scope.UpdateInstanceStatus(ctx); err != nil { 398 return errors.Wrap(err, "failed to update VMSS VM instance status") 399 } 400 401 return nil 402 } 403 404 // Delete will attempt to drain and delete the Azure VMSS VM. 405 func (r *azureMachinePoolMachineReconciler) Delete(ctx context.Context) error { 406 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.azureMachinePoolMachineReconciler.Delete") 407 defer done() 408 409 defer func() { 410 if err := r.Scope.UpdateNodeStatus(ctx); err != nil { 411 log.V(4).Info("failed to update VMSS VM node status during delete") 412 } 413 414 if err := r.Scope.UpdateInstanceStatus(ctx); err != nil { 415 log.V(4).Info("failed to update VMSS VM instance status during delete") 416 } 417 }() 418 419 if err := r.scalesetVMsService.Delete(ctx); err != nil { 420 return errors.Wrap(err, "failed to reconcile scalesetVMs") 421 } 422 423 // no long running operation, so we are finished deleting the resource. Remove the finalizer. 424 controllerutil.RemoveFinalizer(r.Scope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer) 425 426 return nil 427 }