sigs.k8s.io/cluster-api-provider-azure@v1.17.0/exp/controllers/azuremachinepoolmachine_controller.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	corev1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	"k8s.io/apimachinery/pkg/runtime"
    28  	"k8s.io/client-go/tools/record"
    29  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    30  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    31  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    32  	"sigs.k8s.io/cluster-api-provider-azure/azure/services/scalesetvms"
    33  	infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers"
    34  	infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1"
    35  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    36  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    37  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    38  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    39  	capierrors "sigs.k8s.io/cluster-api/errors"
    40  	"sigs.k8s.io/cluster-api/util"
    41  	"sigs.k8s.io/cluster-api/util/annotations"
    42  	"sigs.k8s.io/cluster-api/util/conditions"
    43  	"sigs.k8s.io/cluster-api/util/predicates"
    44  	ctrl "sigs.k8s.io/controller-runtime"
    45  	"sigs.k8s.io/controller-runtime/pkg/builder"
    46  	"sigs.k8s.io/controller-runtime/pkg/client"
    47  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    48  	"sigs.k8s.io/controller-runtime/pkg/handler"
    49  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    50  )
    51  
    52  type (
    53  	azureMachinePoolMachineReconcilerFactory func(*scope.MachinePoolMachineScope) (azure.Reconciler, error)
    54  
    55  	// AzureMachinePoolMachineController handles Kubernetes change events for AzureMachinePoolMachine resources.
    56  	AzureMachinePoolMachineController struct {
    57  		client.Client
    58  		Scheme            *runtime.Scheme
    59  		Recorder          record.EventRecorder
    60  		Timeouts          reconciler.Timeouts
    61  		WatchFilterValue  string
    62  		reconcilerFactory azureMachinePoolMachineReconcilerFactory
    63  	}
    64  
    65  	azureMachinePoolMachineReconciler struct {
    66  		Scope              *scope.MachinePoolMachineScope
    67  		scalesetVMsService *scalesetvms.Service
    68  	}
    69  )
    70  
    71  // NewAzureMachinePoolMachineController creates a new AzureMachinePoolMachineController to handle updates to Azure Machine Pool Machines.
    72  func NewAzureMachinePoolMachineController(c client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureMachinePoolMachineController {
    73  	return &AzureMachinePoolMachineController{
    74  		Client:            c,
    75  		Recorder:          recorder,
    76  		Timeouts:          timeouts,
    77  		WatchFilterValue:  watchFilterValue,
    78  		reconcilerFactory: newAzureMachinePoolMachineReconciler,
    79  	}
    80  }
    81  
    82  // SetupWithManager initializes this controller with a manager.
    83  func (ampmr *AzureMachinePoolMachineController) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options infracontroller.Options) error {
    84  	ctx, log, done := tele.StartSpanWithLogger(ctx,
    85  		"controllers.AzureMachinePoolMachineController.SetupWithManager",
    86  		tele.KVP("controller", "AzureMachinePoolMachine"),
    87  	)
    88  	defer done()
    89  
    90  	var r reconcile.Reconciler = ampmr
    91  	if options.Cache != nil {
    92  		r = coalescing.NewReconciler(ampmr, options.Cache, log)
    93  	}
    94  
    95  	return ctrl.NewControllerManagedBy(mgr).
    96  		WithOptions(options.Options).
    97  		For(&infrav1exp.AzureMachinePoolMachine{}).
    98  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue)).
    99  		// Add a watch on AzureMachinePool for model changes
   100  		Watches(
   101  			&infrav1exp.AzureMachinePool{},
   102  			handler.EnqueueRequestsFromMapFunc(AzureMachinePoolToAzureMachinePoolMachines(ctx, mgr.GetClient(), log)),
   103  			builder.WithPredicates(
   104  				MachinePoolModelHasChanged(log),
   105  				predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue),
   106  			),
   107  		).
   108  		// Add a watch on CAPI Machines for MachinePool Machines
   109  		Watches(
   110  			&clusterv1.Machine{},
   111  			handler.EnqueueRequestsFromMapFunc(util.MachineToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind("AzureMachinePoolMachine"))),
   112  			builder.WithPredicates(
   113  				predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue),
   114  			),
   115  		).
   116  		Complete(r)
   117  }
   118  
   119  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch
   120  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get
   121  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete
   122  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get;update;patch
   123  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get
   124  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;delete
   125  // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
   126  // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
   127  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch
   128  
   129  // Reconcile idempotently gets, creates, and updates a machine pool.
   130  func (ampmr *AzureMachinePoolMachineController) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   131  	ctx, logger, done := tele.StartSpanWithLogger(
   132  		ctx,
   133  		"controllers.AzureMachinePoolMachineController.Reconcile",
   134  		tele.KVP("namespace", req.Namespace),
   135  		tele.KVP("name", req.Name),
   136  		tele.KVP("kind", "AzureMachinePoolMachine"),
   137  	)
   138  	defer done()
   139  
   140  	logger = logger.WithValues("namespace", req.Namespace, "azureMachinePoolMachine", req.Name)
   141  
   142  	ctx, cancel := context.WithTimeout(ctx, ampmr.Timeouts.DefaultedLoopTimeout())
   143  	defer cancel()
   144  
   145  	azureMachine := &infrav1exp.AzureMachinePoolMachine{}
   146  	err := ampmr.Get(ctx, req.NamespacedName, azureMachine)
   147  	if err != nil {
   148  		if apierrors.IsNotFound(err) {
   149  			return reconcile.Result{}, nil
   150  		}
   151  		return reconcile.Result{}, err
   152  	}
   153  	logger.V(2).Info("Fetching cluster for AzureMachinePoolMachine", "ampm", azureMachine.Name)
   154  
   155  	// Fetch the Cluster.
   156  	cluster, err := util.GetClusterFromMetadata(ctx, ampmr.Client, azureMachine.ObjectMeta)
   157  	if err != nil {
   158  		logger.Info("AzureMachinePoolMachine is missing cluster label or cluster does not exist")
   159  		return reconcile.Result{}, nil
   160  	}
   161  
   162  	logger = logger.WithValues("cluster", cluster.Name)
   163  
   164  	// Return early if the object or Cluster is paused.
   165  	if annotations.IsPaused(cluster, azureMachine) {
   166  		logger.Info("AzureMachinePoolMachine or linked Cluster is marked as paused. Won't reconcile")
   167  		return ctrl.Result{}, nil
   168  	}
   169  
   170  	clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampmr.Client, cluster, ampmr.Timeouts)
   171  	if err != nil {
   172  		return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name)
   173  	}
   174  
   175  	logger.V(2).Info("Fetching AzureMachinePool with object meta", "meta", azureMachine.ObjectMeta)
   176  	// Fetch the owning AzureMachinePool (VMSS)
   177  	azureMachinePool, err := infracontroller.GetOwnerAzureMachinePool(ctx, ampmr.Client, azureMachine.ObjectMeta)
   178  	if err != nil {
   179  		if apierrors.IsNotFound(err) {
   180  			logger.Info("AzureMachinePool not found error missing, removing finalizer", "azureMachinePoolMachine", azureMachine.Name)
   181  			controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer)
   182  			return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine)
   183  		}
   184  		return reconcile.Result{}, err
   185  	}
   186  	if azureMachinePool == nil {
   187  		logger.Info("AzureMachinePool not found error missing, removing finalizer", "azureMachinePoolMachine", azureMachine.Name)
   188  		controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer)
   189  		return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine)
   190  	}
   191  
   192  	logger = logger.WithValues("azureMachinePool", azureMachinePool.Name)
   193  
   194  	// Fetch the CAPI MachinePool.
   195  	machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampmr.Client, azureMachinePool.ObjectMeta)
   196  	if err != nil && !apierrors.IsNotFound(err) {
   197  		return reconcile.Result{}, err
   198  	}
   199  
   200  	if machinePool != nil {
   201  		logger = logger.WithValues("machinePool", machinePool.Name)
   202  	}
   203  
   204  	// Fetch the CAPI Machine.
   205  	machine, err := util.GetOwnerMachine(ctx, ampmr.Client, azureMachine.ObjectMeta)
   206  	if err != nil && !apierrors.IsNotFound(err) {
   207  		return reconcile.Result{}, err
   208  	}
   209  
   210  	switch {
   211  	case machine != nil:
   212  		logger = logger.WithValues("machine", machine.Name)
   213  	case !azureMachinePool.ObjectMeta.DeletionTimestamp.IsZero():
   214  		logger.Info("AzureMachinePool is being deleted, removing finalizer")
   215  		controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer)
   216  		return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine)
   217  	default:
   218  		logger.Info("Waiting for Machine Controller to set OwnerRef on AzureMachinePoolMachine")
   219  		return reconcile.Result{}, nil
   220  	}
   221  
   222  	// Create the machine pool scope
   223  	machineScope, err := scope.NewMachinePoolMachineScope(scope.MachinePoolMachineScopeParams{
   224  		Client:                  ampmr.Client,
   225  		MachinePool:             machinePool,
   226  		AzureMachinePool:        azureMachinePool,
   227  		AzureMachinePoolMachine: azureMachine,
   228  		Machine:                 machine,
   229  		ClusterScope:            clusterScope,
   230  	})
   231  	if err != nil {
   232  		return reconcile.Result{}, errors.Wrap(err, "failed to create scope")
   233  	}
   234  
   235  	// Always close the scope when exiting this function so we can persist any AzureMachine changes.
   236  	defer func() {
   237  		if err := machineScope.Close(ctx); err != nil && reterr == nil {
   238  			reterr = err
   239  		}
   240  	}()
   241  
   242  	// Handle deleted machine pools machine
   243  	if !azureMachine.ObjectMeta.DeletionTimestamp.IsZero() {
   244  		return ampmr.reconcileDelete(ctx, machineScope, clusterScope)
   245  	}
   246  
   247  	if !cluster.Status.InfrastructureReady {
   248  		logger.Info("Cluster infrastructure is not ready yet")
   249  		return reconcile.Result{}, nil
   250  	}
   251  
   252  	// Handle non-deleted machine pools
   253  	return ampmr.reconcileNormal(ctx, machineScope)
   254  }
   255  
   256  func (ampmr *AzureMachinePoolMachineController) reconcileNormal(ctx context.Context, machineScope *scope.MachinePoolMachineScope) (_ reconcile.Result, reterr error) {
   257  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolMachineController.reconcileNormal")
   258  	defer done()
   259  
   260  	log.Info("Reconciling AzureMachinePoolMachine")
   261  	// If the AzureMachine is in an error state, return early.
   262  	if machineScope.AzureMachinePool.Status.FailureReason != nil || machineScope.AzureMachinePool.Status.FailureMessage != nil {
   263  		log.Info("Error state detected, skipping reconciliation")
   264  		return reconcile.Result{}, nil
   265  	}
   266  
   267  	ampms, err := ampmr.reconcilerFactory(machineScope)
   268  	if err != nil {
   269  		return reconcile.Result{}, errors.Wrap(err, "failed to create AzureMachinePoolMachine reconciler")
   270  	}
   271  	if err := ampms.Reconcile(ctx); err != nil {
   272  		// Handle transient and terminal errors
   273  		var reconcileError azure.ReconcileError
   274  		if errors.As(err, &reconcileError) {
   275  			if reconcileError.IsTerminal() {
   276  				log.Error(err, "failed to reconcile AzureMachinePool", "name", machineScope.Name())
   277  				return reconcile.Result{}, nil
   278  			}
   279  
   280  			if reconcileError.IsTransient() {
   281  				log.V(4).Info("failed to reconcile AzureMachinePoolMachine", "name", machineScope.Name(), "transient_error", err)
   282  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   283  			}
   284  
   285  			return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachinePool")
   286  		}
   287  
   288  		return reconcile.Result{}, err
   289  	}
   290  
   291  	state := machineScope.ProvisioningState()
   292  	switch state {
   293  	case infrav1.Failed:
   294  		ampmr.Recorder.Eventf(machineScope.AzureMachinePoolMachine, corev1.EventTypeWarning, "FailedVMState", "Azure scale set VM is in failed state")
   295  		machineScope.SetFailureReason(capierrors.UpdateMachineError)
   296  		machineScope.SetFailureMessage(errors.Errorf("Azure VM state is %s", state))
   297  	case infrav1.Deleting:
   298  		log.V(4).Info("deleting machine because state is Deleting", "machine", machineScope.Name())
   299  		if err := ampmr.Client.Delete(ctx, machineScope.Machine); err != nil {
   300  			return reconcile.Result{}, errors.Wrap(err, "machine failed to be deleted when deleting")
   301  		}
   302  	}
   303  
   304  	log.V(2).Info(fmt.Sprintf("Scale Set VM is %s", state), "id", machineScope.ProviderID())
   305  
   306  	bootstrappingCondition := conditions.Get(machineScope.AzureMachinePoolMachine, infrav1.BootstrapSucceededCondition)
   307  	if bootstrappingCondition != nil && bootstrappingCondition.Reason == infrav1.BootstrapFailedReason {
   308  		return reconcile.Result{}, nil
   309  	}
   310  
   311  	if !infrav1.IsTerminalProvisioningState(state) || !machineScope.IsReady() {
   312  		log.V(2).Info("Requeuing", "state", state, "ready", machineScope.IsReady())
   313  		// we are in a non-terminal state, retry in a bit
   314  		return reconcile.Result{
   315  			RequeueAfter: 30 * time.Second,
   316  		}, nil
   317  	}
   318  
   319  	return reconcile.Result{}, nil
   320  }
   321  
   322  func (ampmr *AzureMachinePoolMachineController) reconcileDelete(ctx context.Context, machineScope *scope.MachinePoolMachineScope, clusterScope infracontroller.ClusterScoper) (_ reconcile.Result, reterr error) {
   323  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolMachineController.reconcileDelete")
   324  	defer done()
   325  
   326  	if !infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) {
   327  		log.Info("Skipping VMSS VM deletion as the whole resource group is being deleted")
   328  
   329  		controllerutil.RemoveFinalizer(machineScope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer)
   330  		return reconcile.Result{}, nil
   331  	}
   332  
   333  	if !machineScope.AzureMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
   334  		log.Info("Skipping VMSS VM deletion as VMSS delete will delete individual instances")
   335  
   336  		controllerutil.RemoveFinalizer(machineScope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer)
   337  		return reconcile.Result{}, nil
   338  	}
   339  
   340  	log.Info("Deleting AzureMachinePoolMachine")
   341  
   342  	// deleting a single machine
   343  	// 1) delete the infrastructure, node drain already done by owner Machine
   344  	// 2) remove finalizer
   345  
   346  	ampms, err := ampmr.reconcilerFactory(machineScope)
   347  	if err != nil {
   348  		return reconcile.Result{}, errors.Wrap(err, "failed to create AzureMachinePoolMachine reconciler")
   349  	}
   350  	if err := ampms.Delete(ctx); err != nil {
   351  		// Handle transient and terminal errors
   352  		var reconcileError azure.ReconcileError
   353  		if errors.As(err, &reconcileError) {
   354  			if reconcileError.IsTerminal() {
   355  				log.Error(err, "failed to delete AzureMachinePoolMachine", "name", machineScope.Name())
   356  				return reconcile.Result{}, nil
   357  			}
   358  
   359  			if reconcileError.IsTransient() {
   360  				log.V(4).Info("failed to delete AzureMachinePoolMachine", "name", machineScope.Name(), "transient_error", err)
   361  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   362  			}
   363  
   364  			return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile AzureMachinePool")
   365  		}
   366  
   367  		return reconcile.Result{}, err
   368  	}
   369  
   370  	return reconcile.Result{}, nil
   371  }
   372  
   373  func newAzureMachinePoolMachineReconciler(scope *scope.MachinePoolMachineScope) (azure.Reconciler, error) {
   374  	scaleSetVMsSvc, err := scalesetvms.NewService(scope)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  	return &azureMachinePoolMachineReconciler{
   379  		Scope:              scope,
   380  		scalesetVMsService: scaleSetVMsSvc,
   381  	}, nil
   382  }
   383  
   384  // Reconcile will reconcile the state of the Machine Pool Machine with the state of the Azure VMSS VM.
   385  func (r *azureMachinePoolMachineReconciler) Reconcile(ctx context.Context) error {
   386  	ctx, _, done := tele.StartSpanWithLogger(ctx, "controllers.azureMachinePoolMachineReconciler.Reconcile")
   387  	defer done()
   388  
   389  	if err := r.scalesetVMsService.Reconcile(ctx); err != nil {
   390  		return errors.Wrap(err, "failed to reconcile scalesetVMs")
   391  	}
   392  
   393  	if err := r.Scope.UpdateNodeStatus(ctx); err != nil {
   394  		return errors.Wrap(err, "failed to update VMSS VM node status")
   395  	}
   396  
   397  	if err := r.Scope.UpdateInstanceStatus(ctx); err != nil {
   398  		return errors.Wrap(err, "failed to update VMSS VM instance status")
   399  	}
   400  
   401  	return nil
   402  }
   403  
   404  // Delete will attempt to drain and delete the Azure VMSS VM.
   405  func (r *azureMachinePoolMachineReconciler) Delete(ctx context.Context) error {
   406  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.azureMachinePoolMachineReconciler.Delete")
   407  	defer done()
   408  
   409  	defer func() {
   410  		if err := r.Scope.UpdateNodeStatus(ctx); err != nil {
   411  			log.V(4).Info("failed to update VMSS VM node status during delete")
   412  		}
   413  
   414  		if err := r.Scope.UpdateInstanceStatus(ctx); err != nil {
   415  			log.V(4).Info("failed to update VMSS VM instance status during delete")
   416  		}
   417  	}()
   418  
   419  	if err := r.scalesetVMsService.Delete(ctx); err != nil {
   420  		return errors.Wrap(err, "failed to reconcile scalesetVMs")
   421  	}
   422  
   423  	// no long running operation, so we are finished deleting the resource. Remove the finalizer.
   424  	controllerutil.RemoveFinalizer(r.Scope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer)
   425  
   426  	return nil
   427  }