sigs.k8s.io/cluster-api-provider-azure@v1.17.0/exp/controllers/azuremachinepool_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"time"
    24  
    25  	"github.com/pkg/errors"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/runtime"
    30  	"k8s.io/apimachinery/pkg/runtime/schema"
    31  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    32  	"k8s.io/client-go/tools/record"
    33  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    34  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    35  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    36  	infracontroller "sigs.k8s.io/cluster-api-provider-azure/controllers"
    37  	infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1"
    38  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    39  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    40  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    41  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    42  	kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    43  	capierrors "sigs.k8s.io/cluster-api/errors"
    44  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    45  	"sigs.k8s.io/cluster-api/util"
    46  	"sigs.k8s.io/cluster-api/util/annotations"
    47  	"sigs.k8s.io/cluster-api/util/predicates"
    48  	ctrl "sigs.k8s.io/controller-runtime"
    49  	"sigs.k8s.io/controller-runtime/pkg/builder"
    50  	"sigs.k8s.io/controller-runtime/pkg/client"
    51  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    52  	"sigs.k8s.io/controller-runtime/pkg/handler"
    53  	"sigs.k8s.io/controller-runtime/pkg/predicate"
    54  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    55  )
    56  
    57  type (
    58  	// AzureMachinePoolReconciler reconciles an AzureMachinePool object.
    59  	AzureMachinePoolReconciler struct {
    60  		client.Client
    61  		Scheme                        *runtime.Scheme
    62  		Recorder                      record.EventRecorder
    63  		Timeouts                      reconciler.Timeouts
    64  		WatchFilterValue              string
    65  		createAzureMachinePoolService azureMachinePoolServiceCreator
    66  		BootstrapConfigGVK            schema.GroupVersionKind
    67  	}
    68  
    69  	// annotationReaderWriter provides an interface to read and write annotations.
    70  	annotationReaderWriter interface {
    71  		GetAnnotations() map[string]string
    72  		SetAnnotations(annotations map[string]string)
    73  	}
    74  )
    75  
    76  type azureMachinePoolServiceCreator func(machinePoolScope *scope.MachinePoolScope) (*azureMachinePoolService, error)
    77  
    78  // NewAzureMachinePoolReconciler returns a new AzureMachinePoolReconciler instance.
    79  func NewAzureMachinePoolReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue, bootstrapConfigGVK string) *AzureMachinePoolReconciler {
    80  	gvk := schema.FromAPIVersionAndKind(kubeadmv1.GroupVersion.String(), reflect.TypeOf((*kubeadmv1.KubeadmConfig)(nil)).Elem().Name())
    81  	userGVK, _ := schema.ParseKindArg(bootstrapConfigGVK)
    82  
    83  	if userGVK != nil {
    84  		gvk = *userGVK
    85  	}
    86  
    87  	ampr := &AzureMachinePoolReconciler{
    88  		Client:             client,
    89  		Recorder:           recorder,
    90  		Timeouts:           timeouts,
    91  		WatchFilterValue:   watchFilterValue,
    92  		BootstrapConfigGVK: gvk,
    93  	}
    94  
    95  	ampr.createAzureMachinePoolService = newAzureMachinePoolService
    96  
    97  	return ampr
    98  }
    99  
   100  // SetupWithManager initializes this controller with a manager.
   101  func (ampr *AzureMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options infracontroller.Options) error {
   102  	ctx, log, done := tele.StartSpanWithLogger(ctx,
   103  		"controllers.AzureMachinePoolReconciler.SetupWithManager",
   104  		tele.KVP("controller", "AzureMachinePool"),
   105  	)
   106  	defer done()
   107  
   108  	var r reconcile.Reconciler = ampr
   109  	if options.Cache != nil {
   110  		r = coalescing.NewReconciler(ampr, options.Cache, log)
   111  	}
   112  
   113  	// create mappers to transform incoming AzureClusters and AzureManagedClusters into AzureMachinePool requests
   114  	azureClusterMapper, err := AzureClusterToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log)
   115  	if err != nil {
   116  		return errors.Wrapf(err, "failed to create AzureCluster to AzureMachinePools mapper")
   117  	}
   118  	azureManagedControlPlaneMapper, err := AzureManagedControlPlaneToAzureMachinePoolsMapper(ctx, ampr.Client, mgr.GetScheme(), log)
   119  	if err != nil {
   120  		return errors.Wrapf(err, "failed to create AzureManagedCluster to AzureMachinePools mapper")
   121  	}
   122  
   123  	azureMachinePoolMapper, err := util.ClusterToTypedObjectsMapper(ampr.Client, &infrav1exp.AzureMachinePoolList{}, mgr.GetScheme())
   124  	if err != nil {
   125  		return errors.Wrap(err, "failed to create mapper for Cluster to AzureMachines")
   126  	}
   127  
   128  	config := &metav1.PartialObjectMetadata{}
   129  	config.SetGroupVersionKind(ampr.BootstrapConfigGVK)
   130  	return ctrl.NewControllerManagedBy(mgr).
   131  		WithOptions(options.Options).
   132  		For(&infrav1exp.AzureMachinePool{}).
   133  		WithEventFilter(predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue)).
   134  		// watch for changes in CAPI MachinePool resources
   135  		Watches(
   136  			&expv1.MachinePool{},
   137  			handler.EnqueueRequestsFromMapFunc(MachinePoolToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind(infrav1.AzureMachinePoolKind), log)),
   138  		).
   139  		// watch for changes in AzureCluster resources
   140  		Watches(
   141  			&infrav1.AzureCluster{},
   142  			handler.EnqueueRequestsFromMapFunc(azureClusterMapper),
   143  		).
   144  		// watch for changes in AzureManagedControlPlane resources
   145  		Watches(
   146  			&infrav1.AzureManagedControlPlane{},
   147  			handler.EnqueueRequestsFromMapFunc(azureManagedControlPlaneMapper),
   148  		).
   149  		// watch for changes in KubeadmConfig (or any BootstrapConfig) to sync bootstrap token
   150  		Watches(
   151  			config,
   152  			handler.EnqueueRequestsFromMapFunc(BootstrapConfigToInfrastructureMapFunc(ctx, ampr.Client, log)),
   153  			builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}),
   154  		).
   155  		Watches(
   156  			&infrav1exp.AzureMachinePoolMachine{},
   157  			handler.EnqueueRequestsFromMapFunc(AzureMachinePoolMachineMapper(mgr.GetScheme(), log)),
   158  			builder.WithPredicates(
   159  				MachinePoolMachineHasStateOrVersionChange(log),
   160  				predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue),
   161  			),
   162  		).
   163  		// Add a watch on clusterv1.Cluster object for unpause & ready notifications.
   164  		Watches(
   165  			&clusterv1.Cluster{},
   166  			handler.EnqueueRequestsFromMapFunc(azureMachinePoolMapper),
   167  			builder.WithPredicates(
   168  				infracontroller.ClusterPauseChangeAndInfrastructureReady(log),
   169  				predicates.ResourceHasFilterLabel(log, ampr.WatchFilterValue),
   170  			),
   171  		).
   172  		Complete(r)
   173  }
   174  
   175  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=get;list;watch;create;update;patch;delete
   176  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools/status,verbs=get;update;patch
   177  // +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=kubeadmconfigs;kubeadmconfigs/status,verbs=get;list;watch
   178  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete
   179  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get
   180  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;update;patch
   181  // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
   182  // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
   183  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch
   184  
   185  // Reconcile idempotently gets, creates, and updates a machine pool.
   186  func (ampr *AzureMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   187  	ctx, logger, done := tele.StartSpanWithLogger(
   188  		ctx,
   189  		"controllers.AzureMachinePoolReconciler.Reconcile",
   190  		tele.KVP("namespace", req.Namespace),
   191  		tele.KVP("name", req.Name),
   192  		tele.KVP("kind", infrav1.AzureMachinePoolKind),
   193  	)
   194  	defer done()
   195  	ctx, cancel := context.WithTimeout(ctx, ampr.Timeouts.DefaultedLoopTimeout())
   196  	defer cancel()
   197  
   198  	logger = logger.WithValues("namespace", req.Namespace, "azureMachinePool", req.Name)
   199  
   200  	azMachinePool := &infrav1exp.AzureMachinePool{}
   201  	err := ampr.Get(ctx, req.NamespacedName, azMachinePool)
   202  	if err != nil {
   203  		if apierrors.IsNotFound(err) {
   204  			return reconcile.Result{}, nil
   205  		}
   206  		return reconcile.Result{}, err
   207  	}
   208  
   209  	// Fetch the CAPI MachinePool.
   210  	machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampr.Client, azMachinePool.ObjectMeta)
   211  	if err != nil {
   212  		return reconcile.Result{}, err
   213  	}
   214  	if machinePool == nil {
   215  		logger.V(2).Info("MachinePool Controller has not yet set OwnerRef")
   216  		return reconcile.Result{}, nil
   217  	}
   218  
   219  	logger = logger.WithValues("machinePool", machinePool.Name)
   220  
   221  	// Fetch the Cluster.
   222  	cluster, err := util.GetClusterFromMetadata(ctx, ampr.Client, machinePool.ObjectMeta)
   223  	if err != nil {
   224  		logger.V(2).Info("MachinePool is missing cluster label or cluster does not exist")
   225  		return reconcile.Result{}, nil
   226  	}
   227  
   228  	logger = logger.WithValues("cluster", cluster.Name)
   229  
   230  	clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampr.Client, cluster, ampr.Timeouts)
   231  	if err != nil {
   232  		return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name)
   233  	}
   234  
   235  	// Create the machine pool scope
   236  	machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{
   237  		Client:           ampr.Client,
   238  		MachinePool:      machinePool,
   239  		AzureMachinePool: azMachinePool,
   240  		ClusterScope:     clusterScope,
   241  	})
   242  	if err != nil {
   243  		return reconcile.Result{}, errors.Wrap(err, "failed to create machinepool scope")
   244  	}
   245  
   246  	// Always close the scope when exiting this function so we can persist any AzureMachine changes.
   247  	defer func() {
   248  		if err := machinePoolScope.Close(ctx); err != nil && reterr == nil {
   249  			reterr = err
   250  		}
   251  	}()
   252  
   253  	// Return early if the object or Cluster is paused.
   254  	if annotations.IsPaused(cluster, azMachinePool) {
   255  		logger.V(2).Info("AzureMachinePool or linked Cluster is marked as paused. Won't reconcile normally")
   256  		return ampr.reconcilePause(ctx, machinePoolScope)
   257  	}
   258  
   259  	// Handle deleted machine pools
   260  	if !azMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
   261  		return ampr.reconcileDelete(ctx, machinePoolScope, clusterScope)
   262  	}
   263  
   264  	// Handle non-deleted machine pools
   265  	return ampr.reconcileNormal(ctx, machinePoolScope, cluster)
   266  }
   267  
   268  func (ampr *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) {
   269  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileNormal")
   270  	defer done()
   271  
   272  	log.Info("Reconciling AzureMachinePool")
   273  
   274  	// If the AzureMachine is in an error state, return early.
   275  	if machinePoolScope.AzureMachinePool.Status.FailureReason != nil || machinePoolScope.AzureMachinePool.Status.FailureMessage != nil {
   276  		log.Info("Error state detected, skipping reconciliation")
   277  		return reconcile.Result{}, nil
   278  	}
   279  
   280  	// Register the finalizer immediately to avoid orphaning Azure resources on delete
   281  	needsPatch := controllerutil.AddFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer)
   282  	needsPatch = machinePoolScope.SetInfrastructureMachineKind() || needsPatch
   283  	// Register the block-move annotation immediately to avoid moving un-paused ASO resources
   284  	needsPatch = infracontroller.AddBlockMoveAnnotation(machinePoolScope.AzureMachinePool) || needsPatch
   285  	if needsPatch {
   286  		if err := machinePoolScope.PatchObject(ctx); err != nil {
   287  			return reconcile.Result{}, err
   288  		}
   289  	}
   290  
   291  	if !cluster.Status.InfrastructureReady {
   292  		log.Info("Cluster infrastructure is not ready yet")
   293  		return reconcile.Result{}, nil
   294  	}
   295  
   296  	// Make sure bootstrap data is available and populated.
   297  	if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil {
   298  		log.Info("Bootstrap data secret reference is not yet available")
   299  		return reconcile.Result{}, nil
   300  	}
   301  
   302  	var reconcileError azure.ReconcileError
   303  
   304  	// Initialize the cache to be used by the AzureMachine services.
   305  	err := machinePoolScope.InitMachinePoolCache(ctx)
   306  	if err != nil {
   307  		if errors.As(err, &reconcileError) && reconcileError.IsTerminal() {
   308  			ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "SKUNotFound", errors.Wrap(err, "failed to initialize machinepool cache").Error())
   309  			log.Error(err, "Failed to initialize machinepool cache")
   310  			machinePoolScope.SetFailureReason(capierrors.InvalidConfigurationMachineError)
   311  			machinePoolScope.SetFailureMessage(err)
   312  			machinePoolScope.SetNotReady()
   313  			return reconcile.Result{}, nil
   314  		}
   315  		return reconcile.Result{}, errors.Wrap(err, "failed to init machinepool scope cache")
   316  	}
   317  
   318  	ams, err := ampr.createAzureMachinePoolService(machinePoolScope)
   319  	if err != nil {
   320  		return reconcile.Result{}, errors.Wrap(err, "failed creating a newAzureMachinePoolService")
   321  	}
   322  
   323  	if err := ams.Reconcile(ctx); err != nil {
   324  		// Handle transient and terminal errors
   325  		var reconcileError azure.ReconcileError
   326  		if errors.As(err, &reconcileError) {
   327  			if reconcileError.IsTerminal() {
   328  				log.Error(err, "failed to reconcile AzureMachinePool", "name", machinePoolScope.Name())
   329  				return reconcile.Result{}, nil
   330  			}
   331  
   332  			if reconcileError.IsTransient() {
   333  				if azure.IsOperationNotDoneError(reconcileError) {
   334  					log.V(2).Info(fmt.Sprintf("AzureMachinePool reconcile not done: %s", reconcileError.Error()))
   335  				} else {
   336  					log.V(2).Info(fmt.Sprintf("transient failure to reconcile AzureMachinePool, retrying: %s", reconcileError.Error()))
   337  				}
   338  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   339  			}
   340  
   341  			return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureMachinePool")
   342  		}
   343  
   344  		return reconcile.Result{}, err
   345  	}
   346  
   347  	log.V(2).Info("Scale Set reconciled", "id",
   348  		machinePoolScope.ProviderID(), "state", machinePoolScope.ProvisioningState())
   349  
   350  	switch machinePoolScope.ProvisioningState() {
   351  	case infrav1.Deleting:
   352  		log.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID())
   353  		ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion")
   354  	case infrav1.Failed:
   355  		log.Info("Unexpected scale set failure", "id", machinePoolScope.ProviderID())
   356  		ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMFailure", "Unexpected Azure scale set failure")
   357  	}
   358  
   359  	if machinePoolScope.NeedsRequeue() {
   360  		return reconcile.Result{
   361  			RequeueAfter: 30 * time.Second,
   362  		}, nil
   363  	}
   364  
   365  	return reconcile.Result{}, nil
   366  }
   367  
   368  //nolint:unparam // Always returns an empty struct for reconcile.Result
   369  func (ampr *AzureMachinePoolReconciler) reconcilePause(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (reconcile.Result, error) {
   370  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcilePause")
   371  	defer done()
   372  
   373  	log.Info("Reconciling AzureMachinePool pause")
   374  
   375  	amps, err := ampr.createAzureMachinePoolService(machinePoolScope)
   376  	if err != nil {
   377  		return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService")
   378  	}
   379  
   380  	if err := amps.Pause(ctx); err != nil {
   381  		return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name())
   382  	}
   383  	infracontroller.RemoveBlockMoveAnnotation(machinePoolScope.AzureMachinePool)
   384  
   385  	return reconcile.Result{}, nil
   386  }
   387  
   388  //nolint:unparam // Always returns an empty struct for reconcile.Result
   389  func (ampr *AzureMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope infracontroller.ClusterScoper) (reconcile.Result, error) {
   390  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolReconciler.reconcileDelete")
   391  	defer done()
   392  
   393  	log.V(2).Info("handling deleted AzureMachinePool")
   394  
   395  	if infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) {
   396  		amps, err := ampr.createAzureMachinePoolService(machinePoolScope)
   397  		if err != nil {
   398  			return reconcile.Result{}, errors.Wrap(err, "failed creating a new AzureMachinePoolService")
   399  		}
   400  
   401  		log.V(4).Info("deleting AzureMachinePool resource individually")
   402  		if err := amps.Delete(ctx); err != nil {
   403  			return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name())
   404  		}
   405  	}
   406  
   407  	// Block deletion until all AzureMachinePoolMachines are finished deleting.
   408  	ampms, err := machinePoolScope.GetMachinePoolMachines(ctx)
   409  	if err != nil {
   410  		return reconcile.Result{}, errors.Wrapf(err, "error finding AzureMachinePoolMachines while deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name())
   411  	}
   412  
   413  	if len(ampms) > 0 {
   414  		log.Info("AzureMachinePool still has dependent AzureMachinePoolMachines, deleting them first and requeing", "count", len(ampms))
   415  
   416  		var errs []error
   417  
   418  		for _, ampm := range ampms {
   419  			if !ampm.GetDeletionTimestamp().IsZero() {
   420  				// Don't handle deleted child
   421  				continue
   422  			}
   423  
   424  			if err := machinePoolScope.DeleteMachine(ctx, ampm); err != nil {
   425  				err = errors.Wrapf(err, "error deleting AzureMachinePool %s/%s: failed to delete %s %s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.AzureMachinePool.Name, ampm.Namespace, ampm.Name)
   426  				log.Error(err, "Error deleting AzureMachinePoolMachine", "namespace", ampm.Namespace, "name", ampm.Name)
   427  				errs = append(errs, err)
   428  			}
   429  		}
   430  
   431  		if len(errs) > 0 {
   432  			return ctrl.Result{}, kerrors.NewAggregate(errs)
   433  		}
   434  
   435  		return reconcile.Result{}, nil
   436  	}
   437  
   438  	// Delete succeeded, remove finalizer
   439  	log.V(4).Info("removing finalizer for AzureMachinePool")
   440  	controllerutil.RemoveFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer)
   441  	return reconcile.Result{}, nil
   442  }