sigs.k8s.io/cluster-api-provider-azure@v1.17.0/controllers/azuremanagedmachinepool_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	"k8s.io/client-go/tools/record"
    27  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    29  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    30  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    31  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    32  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    33  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    34  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    35  	"sigs.k8s.io/cluster-api/util"
    36  	"sigs.k8s.io/cluster-api/util/annotations"
    37  	"sigs.k8s.io/cluster-api/util/conditions"
    38  	"sigs.k8s.io/cluster-api/util/predicates"
    39  	ctrl "sigs.k8s.io/controller-runtime"
    40  	"sigs.k8s.io/controller-runtime/pkg/builder"
    41  	"sigs.k8s.io/controller-runtime/pkg/client"
    42  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    43  	"sigs.k8s.io/controller-runtime/pkg/handler"
    44  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    45  )
    46  
    47  // AzureManagedMachinePoolReconciler reconciles an AzureManagedMachinePool object.
    48  type AzureManagedMachinePoolReconciler struct {
    49  	client.Client
    50  	Recorder                             record.EventRecorder
    51  	Timeouts                             reconciler.Timeouts
    52  	WatchFilterValue                     string
    53  	createAzureManagedMachinePoolService azureManagedMachinePoolServiceCreator
    54  }
    55  
    56  type azureManagedMachinePoolServiceCreator func(managedMachinePoolScope *scope.ManagedMachinePoolScope, apiCallTimeout time.Duration) (*azureManagedMachinePoolService, error)
    57  
    58  // NewAzureManagedMachinePoolReconciler returns a new AzureManagedMachinePoolReconciler instance.
    59  func NewAzureManagedMachinePoolReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureManagedMachinePoolReconciler {
    60  	ampr := &AzureManagedMachinePoolReconciler{
    61  		Client:           client,
    62  		Recorder:         recorder,
    63  		Timeouts:         timeouts,
    64  		WatchFilterValue: watchFilterValue,
    65  	}
    66  
    67  	ampr.createAzureManagedMachinePoolService = newAzureManagedMachinePoolService
    68  
    69  	return ampr
    70  }
    71  
    72  // SetupWithManager initializes this controller with a manager.
    73  func (ammpr *AzureManagedMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options Options) error {
    74  	ctx, log, done := tele.StartSpanWithLogger(ctx,
    75  		"controllers.AzureManagedMachinePoolReconciler.SetupWithManager",
    76  		tele.KVP("controller", "AzureManagedMachinePool"),
    77  	)
    78  	defer done()
    79  
    80  	var r reconcile.Reconciler = ammpr
    81  	if options.Cache != nil {
    82  		r = coalescing.NewReconciler(ammpr, options.Cache, log)
    83  	}
    84  
    85  	azManagedMachinePool := &infrav1.AzureManagedMachinePool{}
    86  	// create mapper to transform incoming AzureManagedControlPlanes into AzureManagedMachinePool requests
    87  	azureManagedControlPlaneMapper, err := AzureManagedControlPlaneToAzureManagedMachinePoolsMapper(ctx, ammpr.Client, mgr.GetScheme(), log)
    88  	if err != nil {
    89  		return errors.Wrap(err, "failed to create AzureManagedControlPlane to AzureManagedMachinePools mapper")
    90  	}
    91  
    92  	azureManagedMachinePoolMapper, err := util.ClusterToTypedObjectsMapper(ammpr.Client, &infrav1.AzureManagedMachinePoolList{}, mgr.GetScheme())
    93  	if err != nil {
    94  		return errors.Wrap(err, "failed to create mapper for Cluster to AzureManagedMachinePools")
    95  	}
    96  
    97  	return ctrl.NewControllerManagedBy(mgr).
    98  		WithOptions(options.Options).
    99  		For(azManagedMachinePool).
   100  		WithEventFilter(predicates.ResourceHasFilterLabel(log, ammpr.WatchFilterValue)).
   101  		// watch for changes in CAPI MachinePool resources
   102  		Watches(
   103  			&expv1.MachinePool{},
   104  			handler.EnqueueRequestsFromMapFunc(MachinePoolToInfrastructureMapFunc(infrav1.GroupVersion.WithKind("AzureManagedMachinePool"), log)),
   105  		).
   106  		// watch for changes in AzureManagedControlPlanes
   107  		Watches(
   108  			&infrav1.AzureManagedControlPlane{},
   109  			handler.EnqueueRequestsFromMapFunc(azureManagedControlPlaneMapper),
   110  		).
   111  		// Add a watch on clusterv1.Cluster object for pause/unpause & ready notifications.
   112  		Watches(
   113  			&clusterv1.Cluster{},
   114  			handler.EnqueueRequestsFromMapFunc(azureManagedMachinePoolMapper),
   115  			builder.WithPredicates(
   116  				ClusterPauseChangeAndInfrastructureReady(log),
   117  				predicates.ResourceHasFilterLabel(log, ammpr.WatchFilterValue),
   118  			),
   119  		).
   120  		Complete(r)
   121  }
   122  
   123  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremanagedmachinepools,verbs=get;list;watch;create;update;patch;delete
   124  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremanagedmachinepools/status,verbs=get;update;patch
   125  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;patch
   126  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch
   127  // +kubebuilder:rbac:groups=containerservice.azure.com,resources=managedclustersagentpools,verbs=get;list;watch;create;update;patch;delete
   128  // +kubebuilder:rbac:groups=containerservice.azure.com,resources=managedclustersagentpools/status,verbs=get;list;watch
   129  
   130  // Reconcile idempotently gets, creates, and updates a machine pool.
   131  func (ammpr *AzureManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   132  	ctx, cancel := context.WithTimeout(ctx, ammpr.Timeouts.DefaultedLoopTimeout())
   133  	defer cancel()
   134  
   135  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePoolReconciler.Reconcile",
   136  		tele.KVP("namespace", req.Namespace),
   137  		tele.KVP("name", req.Name),
   138  		tele.KVP("kind", "AzureManagedMachinePool"),
   139  	)
   140  	defer done()
   141  
   142  	// Fetch the AzureManagedMachinePool instance
   143  	infraPool := &infrav1.AzureManagedMachinePool{}
   144  	err := ammpr.Get(ctx, req.NamespacedName, infraPool)
   145  	if err != nil {
   146  		if apierrors.IsNotFound(err) {
   147  			return reconcile.Result{}, nil
   148  		}
   149  		return reconcile.Result{}, err
   150  	}
   151  
   152  	// Fetch the owning MachinePool.
   153  	ownerPool, err := GetOwnerMachinePool(ctx, ammpr.Client, infraPool.ObjectMeta)
   154  	if err != nil {
   155  		return reconcile.Result{}, err
   156  	}
   157  	if ownerPool == nil {
   158  		log.Info("MachinePool Controller has not yet set OwnerRef")
   159  		return reconcile.Result{}, nil
   160  	}
   161  
   162  	// Fetch the Cluster.
   163  	ownerCluster, err := util.GetOwnerCluster(ctx, ammpr.Client, ownerPool.ObjectMeta)
   164  	if err != nil {
   165  		return reconcile.Result{}, err
   166  	}
   167  	if ownerCluster == nil {
   168  		log.Info("Cluster Controller has not yet set OwnerRef")
   169  		return reconcile.Result{}, nil
   170  	}
   171  
   172  	log = log.WithValues("ownerCluster", ownerCluster.Name)
   173  
   174  	// Fetch the corresponding control plane which has all the interesting data.
   175  	controlPlane := &infrav1.AzureManagedControlPlane{}
   176  	controlPlaneName := client.ObjectKey{
   177  		Namespace: ownerCluster.Spec.ControlPlaneRef.Namespace,
   178  		Name:      ownerCluster.Spec.ControlPlaneRef.Name,
   179  	}
   180  	if err := ammpr.Client.Get(ctx, controlPlaneName, controlPlane); err != nil {
   181  		return reconcile.Result{}, err
   182  	}
   183  
   184  	// Upon first create of an AKS service, the node pools are provided to the CreateOrUpdate call. After the initial
   185  	// create of the control plane and node pools, the control plane will transition to initialized. After the control
   186  	// plane is initialized, we can proceed to reconcile managed machine pools.
   187  	if !controlPlane.Status.Initialized {
   188  		log.Info("AzureManagedControlPlane is not initialized")
   189  		return reconcile.Result{}, nil
   190  	}
   191  
   192  	// create the managed control plane scope
   193  	managedControlPlaneScope, err := scope.NewManagedControlPlaneScope(ctx, scope.ManagedControlPlaneScopeParams{
   194  		Client:       ammpr.Client,
   195  		ControlPlane: controlPlane,
   196  		Cluster:      ownerCluster,
   197  		Timeouts:     ammpr.Timeouts,
   198  	})
   199  	if err != nil {
   200  		return reconcile.Result{}, errors.Wrap(err, "failed to create ManagedControlPlane scope")
   201  	}
   202  
   203  	// Create the scope.
   204  	mcpScope, err := scope.NewManagedMachinePoolScope(ctx, scope.ManagedMachinePoolScopeParams{
   205  		Client:       ammpr.Client,
   206  		ControlPlane: controlPlane,
   207  		Cluster:      ownerCluster,
   208  		ManagedMachinePool: scope.ManagedMachinePool{
   209  			MachinePool:      ownerPool,
   210  			InfraMachinePool: infraPool,
   211  		},
   212  		ManagedControlPlaneScope: managedControlPlaneScope,
   213  	})
   214  	if err != nil {
   215  		return reconcile.Result{}, errors.Wrap(err, "failed to create ManagedMachinePool scope")
   216  	}
   217  
   218  	// Always patch when exiting so we can persist changes to finalizers and status
   219  	defer func() {
   220  		if err := mcpScope.PatchObject(ctx); err != nil && reterr == nil {
   221  			reterr = err
   222  		}
   223  		if err := mcpScope.PatchCAPIMachinePoolObject(ctx); err != nil && reterr == nil {
   224  			reterr = err
   225  		}
   226  	}()
   227  
   228  	// Return early if the object or Cluster is paused.
   229  	if annotations.IsPaused(ownerCluster, infraPool) {
   230  		log.Info("AzureManagedMachinePool or linked Cluster is marked as paused. Won't reconcile normally")
   231  		return ammpr.reconcilePause(ctx, mcpScope)
   232  	}
   233  
   234  	// Handle deleted clusters
   235  	if !infraPool.DeletionTimestamp.IsZero() {
   236  		return ammpr.reconcileDelete(ctx, mcpScope)
   237  	}
   238  
   239  	// Handle non-deleted clusters
   240  	return ammpr.reconcileNormal(ctx, mcpScope)
   241  }
   242  
   243  func (ammpr *AzureManagedMachinePoolReconciler) reconcileNormal(ctx context.Context, scope *scope.ManagedMachinePoolScope) (reconcile.Result, error) {
   244  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePoolReconciler.reconcileNormal")
   245  	defer done()
   246  
   247  	log.Info("Reconciling AzureManagedMachinePool")
   248  
   249  	// Register the finalizer immediately to avoid orphaning Azure resources on delete
   250  	needsPatch := controllerutil.AddFinalizer(scope.InfraMachinePool, infrav1.ClusterFinalizer)
   251  	// Register the block-move annotation immediately to avoid moving un-paused ASO resources
   252  	needsPatch = AddBlockMoveAnnotation(scope.InfraMachinePool) || needsPatch
   253  	if needsPatch {
   254  		if err := scope.PatchObject(ctx); err != nil {
   255  			return reconcile.Result{}, err
   256  		}
   257  	}
   258  
   259  	svc, err := ammpr.createAzureManagedMachinePoolService(scope, ammpr.Timeouts.DefaultedAzureServiceReconcileTimeout())
   260  	if err != nil {
   261  		return reconcile.Result{}, errors.Wrap(err, "failed to create an AzureManageMachinePoolService")
   262  	}
   263  
   264  	if err := svc.Reconcile(ctx); err != nil {
   265  		scope.SetAgentPoolReady(false)
   266  		// Ensure the ready condition is false, but do not overwrite an existing
   267  		// error condition which might provide more details.
   268  		if conditions.IsTrue(scope.InfraMachinePool, infrav1.AgentPoolsReadyCondition) {
   269  			conditions.MarkFalse(scope.InfraMachinePool, infrav1.AgentPoolsReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, err.Error())
   270  		}
   271  
   272  		// Handle transient and terminal errors
   273  		log := log.WithValues("name", scope.InfraMachinePool.Name, "namespace", scope.InfraMachinePool.Namespace)
   274  		var reconcileError azure.ReconcileError
   275  		if errors.As(err, &reconcileError) {
   276  			if reconcileError.IsTerminal() {
   277  				log.Error(err, "failed to reconcile AzureManagedMachinePool")
   278  				return reconcile.Result{}, nil
   279  			}
   280  
   281  			if reconcileError.IsTransient() {
   282  				log.V(4).Info("requeuing due to transient failure", "error", err)
   283  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   284  			}
   285  
   286  			return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureManagedMachinePool")
   287  		}
   288  
   289  		return reconcile.Result{}, errors.Wrapf(err, "error creating AzureManagedMachinePool %s/%s", scope.InfraMachinePool.Namespace, scope.InfraMachinePool.Name)
   290  	}
   291  
   292  	// No errors, so mark us ready so the Cluster API Cluster Controller can pull it
   293  	scope.SetAgentPoolReady(true)
   294  	return reconcile.Result{}, nil
   295  }
   296  
   297  //nolint:unparam // Always returns an empty struct for reconcile.Result
   298  func (ammpr *AzureManagedMachinePoolReconciler) reconcilePause(ctx context.Context, scope *scope.ManagedMachinePoolScope) (reconcile.Result, error) {
   299  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePool.reconcilePause")
   300  	defer done()
   301  
   302  	log.Info("Reconciling AzureManagedMachinePool pause")
   303  
   304  	svc, err := ammpr.createAzureManagedMachinePoolService(scope, ammpr.Timeouts.DefaultedAzureServiceReconcileTimeout())
   305  	if err != nil {
   306  		return reconcile.Result{}, errors.Wrap(err, "failed to create an AzureManageMachinePoolService")
   307  	}
   308  
   309  	if err := svc.Pause(ctx); err != nil {
   310  		return reconcile.Result{}, errors.Wrapf(err, "error pausing AzureManagedMachinePool %s/%s", scope.InfraMachinePool.Namespace, scope.InfraMachinePool.Name)
   311  	}
   312  	RemoveBlockMoveAnnotation(scope.InfraMachinePool)
   313  
   314  	return reconcile.Result{}, nil
   315  }
   316  
   317  func (ammpr *AzureManagedMachinePoolReconciler) reconcileDelete(ctx context.Context, scope *scope.ManagedMachinePoolScope) (reconcile.Result, error) {
   318  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePoolReconciler.reconcileDelete")
   319  	defer done()
   320  
   321  	log.Info("Reconciling AzureManagedMachinePool delete")
   322  
   323  	if !scope.Cluster.DeletionTimestamp.IsZero() {
   324  		// Cluster was deleted, skip machine pool deletion and let AKS delete the whole cluster.
   325  		// So, remove the finalizer.
   326  		controllerutil.RemoveFinalizer(scope.InfraMachinePool, infrav1.ClusterFinalizer)
   327  	} else {
   328  		svc, err := ammpr.createAzureManagedMachinePoolService(scope, ammpr.Timeouts.DefaultedAzureServiceReconcileTimeout())
   329  		if err != nil {
   330  			return reconcile.Result{}, errors.Wrap(err, "failed to create an AzureManageMachinePoolService")
   331  		}
   332  
   333  		if err := svc.Delete(ctx); err != nil {
   334  			// Handle transient errors
   335  			var reconcileError azure.ReconcileError
   336  			if errors.As(err, &reconcileError) && reconcileError.IsTransient() {
   337  				if azure.IsOperationNotDoneError(reconcileError) {
   338  					log.V(2).Info(fmt.Sprintf("AzureManagedMachinePool delete not done: %s", reconcileError.Error()))
   339  				} else {
   340  					log.V(2).Info("transient failure to delete AzureManagedMachinePool, retrying")
   341  				}
   342  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   343  			}
   344  			return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureManagedMachinePool %s/%s", scope.InfraMachinePool.Namespace, scope.InfraMachinePool.Name)
   345  		}
   346  		// Machine pool successfully deleted, remove the finalizer.
   347  		controllerutil.RemoveFinalizer(scope.InfraMachinePool, infrav1.ClusterFinalizer)
   348  	}
   349  
   350  	if err := scope.PatchObject(ctx); err != nil {
   351  		return reconcile.Result{}, err
   352  	}
   353  
   354  	return reconcile.Result{}, nil
   355  }