sigs.k8s.io/cluster-api-provider-azure@v1.14.3/exp/controllers/azuremachinepool_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"time"
    22  
    23  	"github.com/pkg/errors"
    24  	corev1 "k8s.io/api/core/v1"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	"k8s.io/apimachinery/pkg/runtime"
    27  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    28  	"k8s.io/client-go/tools/record"
    29  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    30  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    31  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    32  	infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers"
    33  	infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1"
    34  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    35  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    36  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    37  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    38  	kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    39  	capierrors "sigs.k8s.io/cluster-api/errors"
    40  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    41  	"sigs.k8s.io/cluster-api/util"
    42  	"sigs.k8s.io/cluster-api/util/annotations"
    43  	"sigs.k8s.io/cluster-api/util/predicates"
    44  	ctrl "sigs.k8s.io/controller-runtime"
    45  	"sigs.k8s.io/controller-runtime/pkg/builder"
    46  	"sigs.k8s.io/controller-runtime/pkg/client"
    47  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    48  	"sigs.k8s.io/controller-runtime/pkg/handler"
    49  	"sigs.k8s.io/controller-runtime/pkg/predicate"
    50  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    51  	"sigs.k8s.io/controller-runtime/pkg/source"
    52  )
    53  
    54  type (
    55  	// AzureMachinePoolReconciler reconciles an AzureMachinePool object.
    56  	AzureMachinePoolReconciler struct {
    57  		client.Client
    58  		Scheme                        *runtime.Scheme
    59  		Recorder                      record.EventRecorder
    60  		Timeouts                      reconciler.Timeouts
    61  		WatchFilterValue              string
    62  		createAzureMachinePoolService azureMachinePoolServiceCreator
    63  	}
    64  
    65  	// annotationReaderWriter provides an interface to read and write annotations.
    66  	annotationReaderWriter interface {
    67  		GetAnnotations() map[string]string
    68  		SetAnnotations(annotations map[string]string)
    69  	}
    70  )
    71  
    72  type azureMachinePoolServiceCreator func(machinePoolScope *scope.MachinePoolScope) (*azureMachinePoolService, error)
    73  
    74  // NewAzureMachinePoolReconciler returns a new AzureMachinePoolReconciler instance.
    75  func NewAzureMachinePoolReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureMachinePoolReconciler {
    76  	ampr := &AzureMachinePoolReconciler{
    77  		Client:           client,
    78  		Recorder:         recorder,
    79  		Timeouts:         timeouts,
    80  		WatchFilterValue: watchFilterValue,
    81  	}
    82  
    83  	ampr.createAzureMachinePoolService = newAzureMachinePoolService
    84  
    85  	return ampr
    86  }
    87  
    88  // SetupWithManager initializes this controller with a manager.
    89  func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options infracontroller.Options) error {
    90  	ctx, log, done := tele.StartSpanWithLogger(ctx,
    91  		"controllers.AzureMachinePoolReconciler.SetupWithManager",
    92  		tele.KVP("controller", "AzureMachinePool"),
    93  	)
    94  	defer done()
    95  
    96  	var r reconcile.Reconciler = ampr
    97  	if options.Cache != nil {
    98  		r = coalescing.NewReconciler(ampr, options.Cache, log)
    99  	}
   100  
   101  	// create mappers to transform incoming AzureClusters and AzureManagedClusters into AzureMachinePool requests
   102  	azureClusterMapper, err := AzureClusterToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log)
   103  	if err != nil {
   104  		return errors.Wrapf(err, "failed to create AzureCluster to AzureMachinePools mapper")
   105  	}
   106  	azureManagedControlPlaneMapper, err := AzureManagedControlPlaneToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log)
   107  	if err != nil {
   108  		return errors.Wrapf(err, "failed to create AzureManagedCluster to AzureMachinePools mapper")
   109  	}
   110  
   111  	c, err := ctrl.NewControllerManagedBy(mgr).
   112  		WithOptions(options.Options).
   113  		For(&infrav1exp.AzureMachinePool{}).
   114  		WithEventFilter(predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue)).
   115  		// watch for changes in CAPI MachinePool resources
   116  		Watches(
   117  			&expv1.MachinePool{},
   118  			handler.EnqueueRequestsFromMapFunc(MachinePoolToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind(infrav1.AzureMachinePoolKind), log)),
   119  		).
   120  		// watch for changes in AzureCluster resources
   121  		Watches(
   122  			&infrav1.AzureCluster{},
   123  			handler.EnqueueRequestsFromMapFunc(azureClusterMapper),
   124  		).
   125  		// watch for changes in AzureManagedControlPlane resources
   126  		Watches(
   127  			&infrav1.AzureManagedControlPlane{},
   128  			handler.EnqueueRequestsFromMapFunc(azureManagedControlPlaneMapper),
   129  		).
   130  		// watch for changes in KubeadmConfig to sync bootstrap token
   131  		Watches(
   132  			&kubeadmv1.KubeadmConfig{},
   133  			handler.EnqueueRequestsFromMapFunc(KubeadmConfigToInfrastructureMapFunc(ctx, ampr.Client, log)),
   134  			builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}),
   135  		).
   136  		Build(r)
   137  	if err != nil {
   138  		return errors.Wrap(err, "error creating controller")
   139  	}
   140  
   141  	if err := c.Watch(
   142  		source.Kind(mgr.GetCache(), &infrav1exp.AzureMachinePoolMachine{}),
   143  		handler.EnqueueRequestsFromMapFunc(AzureMachinePoolMachineMapper(mgr.GetScheme(), log)),
   144  		MachinePoolMachineHasStateOrVersionChange(log),
   145  		predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue),
   146  	); err != nil {
   147  		return errors.Wrap(err, "failed adding a watch for AzureMachinePoolMachine")
   148  	}
   149  
   150  	azureMachinePoolMapper, err := util.ClusterToTypedObjectsMapper(ampr.Client, &infrav1exp.AzureMachinePoolList{}, mgr.GetScheme())
   151  	if err != nil {
   152  		return errors.Wrap(err, "failed to create mapper for Cluster to AzureMachines")
   153  	}
   154  
   155  	// Add a watch on clusterv1.Cluster object for unpause & ready notifications.
   156  	if err := c.Watch(
   157  		source.Kind(mgr.GetCache(), &clusterv1.Cluster{}),
   158  		handler.EnqueueRequestsFromMapFunc(azureMachinePoolMapper),
   159  		infracontroller.ClusterPauseChangeAndInfrastructureReady(log),
   160  		predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue),
   161  	); err != nil {
   162  		return errors.Wrap(err, "failed adding a watch for ready clusters")
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch;create;update;patch;delete
   169  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get;update;patch
   170  // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch
   171  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete
   172  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get
   173  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch
   174  // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
   175  // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
   176  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch
   177  
   178  // Reconcile idempotently gets, creates, and updates a machine pool.
   179  func (ampr *AzureMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   180  	ctx, logger, done := tele.StartSpanWithLogger(
   181  		ctx,
   182  		"controllers.AzureMachinePoolReconciler.Reconcile",
   183  		tele.KVP("namespace", req.Namespace),
   184  		tele.KVP("name", req.Name),
   185  		tele.KVP("kind", infrav1.AzureMachinePoolKind),
   186  	)
   187  	defer done()
   188  	ctx, cancel := context.WithTimeout(ctx, ampr.Timeouts.DefaultedLoopTimeout())
   189  	defer cancel()
   190  
   191  	logger = logger.WithValues("namespace", req.Namespace, "azureMachinePool", req.Name)
   192  
   193  	azMachinePool := &infrav1exp.AzureMachinePool{}
   194  	err := ampr.Get(ctx, req.NamespacedName, azMachinePool)
   195  	if err != nil {
   196  		if apierrors.IsNotFound(err) {
   197  			return reconcile.Result{}, nil
   198  		}
   199  		return reconcile.Result{}, err
   200  	}
   201  
   202  	// Fetch the CAPI MachinePool.
   203  	machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampr.Client, azMachinePool.ObjectMeta)
   204  	if err != nil {
   205  		return reconcile.Result{}, err
   206  	}
   207  	if machinePool == nil {
   208  		logger.V(2).Info("MachinePool Controller has not yet set OwnerRef")
   209  		return reconcile.Result{}, nil
   210  	}
   211  
   212  	logger = logger.WithValues("machinePool", machinePool.Name)
   213  
   214  	// Fetch the Cluster.
   215  	cluster, err := util.GetClusterFromMetadata(ctx, ampr.Client, machinePool.ObjectMeta)
   216  	if err != nil {
   217  		logger.V(2).Info("MachinePool is missing cluster label or cluster does not exist")
   218  		return reconcile.Result{}, nil
   219  	}
   220  
   221  	logger = logger.WithValues("cluster", cluster.Name)
   222  
   223  	clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampr.Client, cluster, ampr.Timeouts)
   224  	if err != nil {
   225  		return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name)
   226  	}
   227  
   228  	// Create the machine pool scope
   229  	machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{
   230  		Client:           ampr.Client,
   231  		MachinePool:      machinePool,
   232  		AzureMachinePool: azMachinePool,
   233  		ClusterScope:     clusterScope,
   234  	})
   235  	if err != nil {
   236  		return reconcile.Result{}, errors.Wrap(err, "failed to create machinepool scope")
   237  	}
   238  
   239  	// Always close the scope when exiting this function so we can persist any AzureMachine changes.
   240  	defer func() {
   241  		if err := machinePoolScope.Close(ctx); err != nil && reterr == nil {
   242  			reterr = err
   243  		}
   244  	}()
   245  
   246  	// Return early if the object or Cluster is paused.
   247  	if annotations.IsPaused(cluster, azMachinePool) {
   248  		logger.V(2).Info("AzureMachinePool or linked Cluster is marked as paused. Won't reconcile normally")
   249  		return ampr.reconcilePause(ctx, machinePoolScope)
   250  	}
   251  
   252  	// Handle deleted machine pools
   253  	if !azMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
   254  		return ampr.reconcileDelete(ctx, machinePoolScope, clusterScope)
   255  	}
   256  
   257  	// Handle non-deleted machine pools
   258  	return ampr.reconcileNormal(ctx, machinePoolScope, cluster)
   259  }
   260  
   261  func (ampr *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) {
   262  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileNormal")
   263  	defer done()
   264  
   265  	log.Info("Reconciling AzureMachinePool")
   266  
   267  	// If the AzureMachine is in an error state, return early.
   268  	if machinePoolScope.AzureMachinePool.Status.FailureReason != nil || machinePoolScope.AzureMachinePool.Status.FailureMessage != nil {
   269  		log.Info("Error state detected, skipping reconciliation")
   270  		return reconcile.Result{}, nil
   271  	}
   272  
   273  	// Register the finalizer immediately to avoid orphaning Azure resources on delete
   274  	needsPatch := controllerutil.AddFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer)
   275  	needsPatch = machinePoolScope.SetInfrastructureMachineKind() || needsPatch
   276  	// Register the block-move annotation immediately to avoid moving un-paused ASO resources
   277  	needsPatch = infracontroller.AddBlockMoveAnnotation(machinePoolScope.AzureMachinePool) || needsPatch
   278  	if needsPatch {
   279  		if err := machinePoolScope.PatchObject(ctx); err != nil {
   280  			return reconcile.Result{}, err
   281  		}
   282  	}
   283  
   284  	if !cluster.Status.InfrastructureReady {
   285  		log.Info("Cluster infrastructure is not ready yet")
   286  		return reconcile.Result{}, nil
   287  	}
   288  
   289  	// Make sure bootstrap data is available and populated.
   290  	if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil {
   291  		log.Info("Bootstrap data secret reference is not yet available")
   292  		return reconcile.Result{}, nil
   293  	}
   294  
   295  	var reconcileError azure.ReconcileError
   296  
   297  	// Initialize the cache to be used by the AzureMachine services.
   298  	err := machinePoolScope.InitMachinePoolCache(ctx)
   299  	if err != nil {
   300  		if errors.As(err, &reconcileError) && reconcileError.IsTerminal() {
   301  			ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "SKUNotFound", errors.Wrap(err, "failed to initialize machinepool cache").Error())
   302  			log.Error(err, "Failed to initialize machinepool cache")
   303  			machinePoolScope.SetFailureReason(capierrors.InvalidConfigurationMachineError)
   304  			machinePoolScope.SetFailureMessage(err)
   305  			machinePoolScope.SetNotReady()
   306  			return reconcile.Result{}, nil
   307  		}
   308  		return reconcile.Result{}, errors.Wrap(err, "failed to init machinepool scope cache")
   309  	}
   310  
   311  	ams, err := ampr.createAzureMachinePoolService(machinePoolScope)
   312  	if err != nil {
   313  		return reconcile.Result{}, errors.Wrap(err, "failed creating a newAzureMachinePoolService")
   314  	}
   315  
   316  	if err := ams.Reconcile(ctx); err != nil {
   317  		// Handle transient and terminal errors
   318  		var reconcileError azure.ReconcileError
   319  		if errors.As(err, &reconcileError) {
   320  			if reconcileError.IsTerminal() {
   321  				log.Error(err, "failed to reconcile AzureMachinePool", "name", machinePoolScope.Name())
   322  				return reconcile.Result{}, nil
   323  			}
   324  
   325  			if reconcileError.IsTransient() {
   326  				log.Error(err, "failed to reconcile AzureMachinePool", "name", machinePoolScope.Name())
   327  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   328  			}
   329  
   330  			return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachinePool")
   331  		}
   332  
   333  		return reconcile.Result{}, err
   334  	}
   335  
   336  	log.V(2).Info("Scale Set reconciled", "id",
   337  		machinePoolScope.ProviderID(), "state", machinePoolScope.ProvisioningState())
   338  
   339  	switch machinePoolScope.ProvisioningState() {
   340  	case infrav1.Deleting:
   341  		log.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID())
   342  		ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion")
   343  	case infrav1.Failed:
   344  		log.Info("Unexpected scale set failure", "id", machinePoolScope.ProviderID())
   345  		ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMFailure", "Unexpected Azure scale set failure")
   346  	}
   347  
   348  	if machinePoolScope.NeedsRequeue() {
   349  		return reconcile.Result{
   350  			RequeueAfter: 30 * time.Second,
   351  		}, nil
   352  	}
   353  
   354  	return reconcile.Result{}, nil
   355  }
   356  
   357  //nolint:unparam // Always returns an empty struct for reconcile.Result
   358  func (ampr *AzureMachinePoolReconciler) reconcilePause(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (reconcile.Result, error) {
   359  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcilePause")
   360  	defer done()
   361  
   362  	log.Info("Reconciling AzureMachinePool pause")
   363  
   364  	amps, err := ampr.createAzureMachinePoolService(machinePoolScope)
   365  	if err != nil {
   366  		return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService")
   367  	}
   368  
   369  	if err := amps.Pause(ctx); err != nil {
   370  		return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name())
   371  	}
   372  	infracontroller.RemoveBlockMoveAnnotation(machinePoolScope.AzureMachinePool)
   373  
   374  	return reconcile.Result{}, nil
   375  }
   376  
   377  //nolint:unparam // Always returns an empty struct for reconcile.Result
   378  func (ampr *AzureMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope infracontroller.ClusterScoper) (reconcile.Result, error) {
   379  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileDelete")
   380  	defer done()
   381  
   382  	log.V(2).Info("handling deleted AzureMachinePool")
   383  
   384  	if infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) {
   385  		amps, err := ampr.createAzureMachinePoolService(machinePoolScope)
   386  		if err != nil {
   387  			return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService")
   388  		}
   389  
   390  		log.V(4).Info("deleting AzureMachinePool resource individually")
   391  		if err := amps.Delete(ctx); err != nil {
   392  			return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name())
   393  		}
   394  	}
   395  
   396  	// Block deletion until all AzureMachinePoolMachines are finished deleting.
   397  	ampms, err := machinePoolScope.GetMachinePoolMachines(ctx)
   398  	if err != nil {
   399  		return reconcile.Result{}, errors.Wrapf(err, "error finding AzureMachinePoolMachines while deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name())
   400  	}
   401  
   402  	if len(ampms) > 0 {
   403  		log.Info("AzureMachinePool still has dependent AzureMachinePoolMachines, deleting them first and requeing", "count", len(ampms))
   404  
   405  		var errs []error
   406  
   407  		for _, ampm := range ampms {
   408  			if !ampm.GetDeletionTimestamp().IsZero() {
   409  				// Don't handle deleted child
   410  				continue
   411  			}
   412  
   413  			if err := machinePoolScope.DeleteMachine(ctx, ampm); err != nil {
   414  				err = errors.Wrapf(err, "error deleting AzureMachinePool %s/%s: failed to delete %s %s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.AzureMachinePool.Name, ampm.Namespace, ampm.Name)
   415  				log.Error(err, "Error deleting AzureMachinePoolMachine", "namespace", ampm.Namespace, "name", ampm.Name)
   416  				errs = append(errs, err)
   417  			}
   418  		}
   419  
   420  		if len(errs) > 0 {
   421  			return ctrl.Result{}, kerrors.NewAggregate(errs)
   422  		}
   423  
   424  		return reconcile.Result{}, nil
   425  	}
   426  
   427  	// Delete succeeded, remove finalizer
   428  	log.V(4).Info("removing finalizer for AzureMachinePool")
   429  	controllerutil.RemoveFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer)
   430  	return reconcile.Result{}, nil
   431  }