sigs.k8s.io/cluster-api-provider-azure@v1.14.3/controllers/azuremanagedmachinepool_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	"k8s.io/client-go/tools/record"
    27  	infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api-provider-azure/azure"
    29  	"sigs.k8s.io/cluster-api-provider-azure/azure/scope"
    30  	"sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing"
    31  	"sigs.k8s.io/cluster-api-provider-azure/util/reconciler"
    32  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    33  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    34  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    35  	"sigs.k8s.io/cluster-api/util"
    36  	"sigs.k8s.io/cluster-api/util/annotations"
    37  	"sigs.k8s.io/cluster-api/util/conditions"
    38  	"sigs.k8s.io/cluster-api/util/predicates"
    39  	ctrl "sigs.k8s.io/controller-runtime"
    40  	"sigs.k8s.io/controller-runtime/pkg/client"
    41  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    42  	"sigs.k8s.io/controller-runtime/pkg/handler"
    43  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    44  	"sigs.k8s.io/controller-runtime/pkg/source"
    45  )
    46  
    47  // AzureManagedMachinePoolReconciler reconciles an AzureManagedMachinePool object.
    48  type AzureManagedMachinePoolReconciler struct {
    49  	client.Client
    50  	Recorder                             record.EventRecorder
    51  	Timeouts                             reconciler.Timeouts
    52  	WatchFilterValue                     string
    53  	createAzureManagedMachinePoolService azureManagedMachinePoolServiceCreator
    54  }
    55  
    56  type azureManagedMachinePoolServiceCreator func(managedMachinePoolScope *scope.ManagedMachinePoolScope, apiCallTimeout time.Duration) (*azureManagedMachinePoolService, error)
    57  
    58  // NewAzureManagedMachinePoolReconciler returns a new AzureManagedMachinePoolReconciler instance.
    59  func NewAzureManagedMachinePoolReconciler(client client.Client, recorder record.EventRecorder, timeouts reconciler.Timeouts, watchFilterValue string) *AzureManagedMachinePoolReconciler {
    60  	ampr := &AzureManagedMachinePoolReconciler{
    61  		Client:           client,
    62  		Recorder:         recorder,
    63  		Timeouts:         timeouts,
    64  		WatchFilterValue: watchFilterValue,
    65  	}
    66  
    67  	ampr.createAzureManagedMachinePoolService = newAzureManagedMachinePoolService
    68  
    69  	return ampr
    70  }
    71  
    72  // SetupWithManager initializes this controller with a manager.
    73  func (ammpr *AzureManagedMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options Options) error {
    74  	ctx, log, done := tele.StartSpanWithLogger(ctx,
    75  		"controllers.AzureManagedMachinePoolReconciler.SetupWithManager",
    76  		tele.KVP("controller", "AzureManagedMachinePool"),
    77  	)
    78  	defer done()
    79  
    80  	var r reconcile.Reconciler = ammpr
    81  	if options.Cache != nil {
    82  		r = coalescing.NewReconciler(ammpr, options.Cache, log)
    83  	}
    84  
    85  	azManagedMachinePool := &infrav1.AzureManagedMachinePool{}
    86  	// create mapper to transform incoming AzureManagedControlPlanes into AzureManagedMachinePool requests
    87  	azureManagedControlPlaneMapper, err := AzureManagedControlPlaneToAzureManagedMachinePoolsMapper(ctx, ammpr.Client, mgr.GetScheme(), log)
    88  	if err != nil {
    89  		return errors.Wrap(err, "failed to create AzureManagedControlPlane to AzureManagedMachinePools mapper")
    90  	}
    91  
    92  	c, err := ctrl.NewControllerManagedBy(mgr).
    93  		WithOptions(options.Options).
    94  		For(azManagedMachinePool).
    95  		WithEventFilter(predicates.ResourceHasFilterLabel(log, ammpr.WatchFilterValue)).
    96  		// watch for changes in CAPI MachinePool resources
    97  		Watches(
    98  			&expv1.MachinePool{},
    99  			handler.EnqueueRequestsFromMapFunc(MachinePoolToInfrastructureMapFunc(infrav1.GroupVersion.WithKind("AzureManagedMachinePool"), log)),
   100  		).
   101  		// watch for changes in AzureManagedControlPlanes
   102  		Watches(
   103  			&infrav1.AzureManagedControlPlane{},
   104  			handler.EnqueueRequestsFromMapFunc(azureManagedControlPlaneMapper),
   105  		).
   106  		Build(r)
   107  	if err != nil {
   108  		return errors.Wrap(err, "error creating controller")
   109  	}
   110  
   111  	azureManagedMachinePoolMapper, err := util.ClusterToTypedObjectsMapper(ammpr.Client, &infrav1.AzureManagedMachinePoolList{}, mgr.GetScheme())
   112  	if err != nil {
   113  		return errors.Wrap(err, "failed to create mapper for Cluster to AzureManagedMachinePools")
   114  	}
   115  
   116  	// Add a watch on clusterv1.Cluster object for pause/unpause & ready notifications.
   117  	if err = c.Watch(
   118  		source.Kind(mgr.GetCache(), &clusterv1.Cluster{}),
   119  		handler.EnqueueRequestsFromMapFunc(azureManagedMachinePoolMapper),
   120  		ClusterPauseChangeAndInfrastructureReady(log),
   121  		predicates.ResourceHasFilterLabel(log, ammpr.WatchFilterValue),
   122  	); err != nil {
   123  		return errors.Wrap(err, "failed adding a watch for ready clusters")
   124  	}
   125  
   126  	return nil
   127  }
   128  
   129  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremanagedmachinepools,verbs=get;list;watch;create;update;patch;delete
   130  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremanagedmachinepools/status,verbs=get;update;patch
   131  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;patch
   132  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch
   133  // +kubebuilder:rbac:groups=containerservice.azure.com,resources=managedclustersagentpools,verbs=get;list;watch;create;update;patch;delete
   134  // +kubebuilder:rbac:groups=containerservice.azure.com,resources=managedclustersagentpools/status,verbs=get;list;watch
   135  
   136  // Reconcile idempotently gets, creates, and updates a machine pool.
   137  func (ammpr *AzureManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   138  	ctx, cancel := context.WithTimeout(ctx, ammpr.Timeouts.DefaultedLoopTimeout())
   139  	defer cancel()
   140  
   141  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePoolReconciler.Reconcile",
   142  		tele.KVP("namespace", req.Namespace),
   143  		tele.KVP("name", req.Name),
   144  		tele.KVP("kind", "AzureManagedMachinePool"),
   145  	)
   146  	defer done()
   147  
   148  	// Fetch the AzureManagedMachinePool instance
   149  	infraPool := &infrav1.AzureManagedMachinePool{}
   150  	err := ammpr.Get(ctx, req.NamespacedName, infraPool)
   151  	if err != nil {
   152  		if apierrors.IsNotFound(err) {
   153  			return reconcile.Result{}, nil
   154  		}
   155  		return reconcile.Result{}, err
   156  	}
   157  
   158  	// Fetch the owning MachinePool.
   159  	ownerPool, err := GetOwnerMachinePool(ctx, ammpr.Client, infraPool.ObjectMeta)
   160  	if err != nil {
   161  		return reconcile.Result{}, err
   162  	}
   163  	if ownerPool == nil {
   164  		log.Info("MachinePool Controller has not yet set OwnerRef")
   165  		return reconcile.Result{}, nil
   166  	}
   167  
   168  	// Fetch the Cluster.
   169  	ownerCluster, err := util.GetOwnerCluster(ctx, ammpr.Client, ownerPool.ObjectMeta)
   170  	if err != nil {
   171  		return reconcile.Result{}, err
   172  	}
   173  	if ownerCluster == nil {
   174  		log.Info("Cluster Controller has not yet set OwnerRef")
   175  		return reconcile.Result{}, nil
   176  	}
   177  
   178  	log = log.WithValues("ownerCluster", ownerCluster.Name)
   179  
   180  	// Fetch the corresponding control plane which has all the interesting data.
   181  	controlPlane := &infrav1.AzureManagedControlPlane{}
   182  	controlPlaneName := client.ObjectKey{
   183  		Namespace: ownerCluster.Spec.ControlPlaneRef.Namespace,
   184  		Name:      ownerCluster.Spec.ControlPlaneRef.Name,
   185  	}
   186  	if err := ammpr.Client.Get(ctx, controlPlaneName, controlPlane); err != nil {
   187  		return reconcile.Result{}, err
   188  	}
   189  
   190  	// Upon first create of an AKS service, the node pools are provided to the CreateOrUpdate call. After the initial
   191  	// create of the control plane and node pools, the control plane will transition to initialized. After the control
   192  	// plane is initialized, we can proceed to reconcile managed machine pools.
   193  	if !controlPlane.Status.Initialized {
   194  		log.Info("AzureManagedControlPlane is not initialized")
   195  		return reconcile.Result{}, nil
   196  	}
   197  
   198  	// create the managed control plane scope
   199  	managedControlPlaneScope, err := scope.NewManagedControlPlaneScope(ctx, scope.ManagedControlPlaneScopeParams{
   200  		Client:       ammpr.Client,
   201  		ControlPlane: controlPlane,
   202  		Cluster:      ownerCluster,
   203  		Timeouts:     ammpr.Timeouts,
   204  	})
   205  	if err != nil {
   206  		return reconcile.Result{}, errors.Wrap(err, "failed to create ManagedControlPlane scope")
   207  	}
   208  
   209  	// Create the scope.
   210  	mcpScope, err := scope.NewManagedMachinePoolScope(ctx, scope.ManagedMachinePoolScopeParams{
   211  		Client:       ammpr.Client,
   212  		ControlPlane: controlPlane,
   213  		Cluster:      ownerCluster,
   214  		ManagedMachinePool: scope.ManagedMachinePool{
   215  			MachinePool:      ownerPool,
   216  			InfraMachinePool: infraPool,
   217  		},
   218  		ManagedControlPlaneScope: managedControlPlaneScope,
   219  	})
   220  	if err != nil {
   221  		return reconcile.Result{}, errors.Wrap(err, "failed to create ManagedMachinePool scope")
   222  	}
   223  
   224  	// Always patch when exiting so we can persist changes to finalizers and status
   225  	defer func() {
   226  		if err := mcpScope.PatchObject(ctx); err != nil && reterr == nil {
   227  			reterr = err
   228  		}
   229  		if err := mcpScope.PatchCAPIMachinePoolObject(ctx); err != nil && reterr == nil {
   230  			reterr = err
   231  		}
   232  	}()
   233  
   234  	// Return early if the object or Cluster is paused.
   235  	if annotations.IsPaused(ownerCluster, infraPool) {
   236  		log.Info("AzureManagedMachinePool or linked Cluster is marked as paused. Won't reconcile normally")
   237  		return ammpr.reconcilePause(ctx, mcpScope)
   238  	}
   239  
   240  	// Handle deleted clusters
   241  	if !infraPool.DeletionTimestamp.IsZero() {
   242  		return ammpr.reconcileDelete(ctx, mcpScope)
   243  	}
   244  
   245  	// Handle non-deleted clusters
   246  	return ammpr.reconcileNormal(ctx, mcpScope)
   247  }
   248  
   249  func (ammpr *AzureManagedMachinePoolReconciler) reconcileNormal(ctx context.Context, scope *scope.ManagedMachinePoolScope) (reconcile.Result, error) {
   250  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePoolReconciler.reconcileNormal")
   251  	defer done()
   252  
   253  	log.Info("Reconciling AzureManagedMachinePool")
   254  
   255  	// Register the finalizer immediately to avoid orphaning Azure resources on delete
   256  	needsPatch := controllerutil.AddFinalizer(scope.InfraMachinePool, infrav1.ClusterFinalizer)
   257  	// Register the block-move annotation immediately to avoid moving un-paused ASO resources
   258  	needsPatch = AddBlockMoveAnnotation(scope.InfraMachinePool) || needsPatch
   259  	if needsPatch {
   260  		if err := scope.PatchObject(ctx); err != nil {
   261  			return reconcile.Result{}, err
   262  		}
   263  	}
   264  
   265  	svc, err := ammpr.createAzureManagedMachinePoolService(scope, ammpr.Timeouts.DefaultedAzureServiceReconcileTimeout())
   266  	if err != nil {
   267  		return reconcile.Result{}, errors.Wrap(err, "failed to create an AzureManageMachinePoolService")
   268  	}
   269  
   270  	if err := svc.Reconcile(ctx); err != nil {
   271  		scope.SetAgentPoolReady(false)
   272  		// Ensure the ready condition is false, but do not overwrite an existing
   273  		// error condition which might provide more details.
   274  		if conditions.IsTrue(scope.InfraMachinePool, infrav1.AgentPoolsReadyCondition) {
   275  			conditions.MarkFalse(scope.InfraMachinePool, infrav1.AgentPoolsReadyCondition, infrav1.FailedReason, clusterv1.ConditionSeverityError, err.Error())
   276  		}
   277  
   278  		// Handle transient and terminal errors
   279  		log := log.WithValues("name", scope.InfraMachinePool.Name, "namespace", scope.InfraMachinePool.Namespace)
   280  		var reconcileError azure.ReconcileError
   281  		if errors.As(err, &reconcileError) {
   282  			if reconcileError.IsTerminal() {
   283  				log.Error(err, "failed to reconcile AzureManagedMachinePool")
   284  				return reconcile.Result{}, nil
   285  			}
   286  
   287  			if reconcileError.IsTransient() {
   288  				log.V(4).Info("requeuing due to transient failure", "error", err)
   289  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   290  			}
   291  
   292  			return reconcile.Result{}, errors.Wrap(err, "failed to reconcile AzureManagedMachinePool")
   293  		}
   294  
   295  		return reconcile.Result{}, errors.Wrapf(err, "error creating AzureManagedMachinePool %s/%s", scope.InfraMachinePool.Namespace, scope.InfraMachinePool.Name)
   296  	}
   297  
   298  	// No errors, so mark us ready so the Cluster API Cluster Controller can pull it
   299  	scope.SetAgentPoolReady(true)
   300  	return reconcile.Result{}, nil
   301  }
   302  
   303  //nolint:unparam // Always returns an empty struct for reconcile.Result
   304  func (ammpr *AzureManagedMachinePoolReconciler) reconcilePause(ctx context.Context, scope *scope.ManagedMachinePoolScope) (reconcile.Result, error) {
   305  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePool.reconcilePause")
   306  	defer done()
   307  
   308  	log.Info("Reconciling AzureManagedMachinePool pause")
   309  
   310  	svc, err := ammpr.createAzureManagedMachinePoolService(scope, ammpr.Timeouts.DefaultedAzureServiceReconcileTimeout())
   311  	if err != nil {
   312  		return reconcile.Result{}, errors.Wrap(err, "failed to create an AzureManageMachinePoolService")
   313  	}
   314  
   315  	if err := svc.Pause(ctx); err != nil {
   316  		return reconcile.Result{}, errors.Wrapf(err, "error pausing AzureManagedMachinePool %s/%s", scope.InfraMachinePool.Namespace, scope.InfraMachinePool.Name)
   317  	}
   318  	RemoveBlockMoveAnnotation(scope.InfraMachinePool)
   319  
   320  	return reconcile.Result{}, nil
   321  }
   322  
   323  func (ammpr *AzureManagedMachinePoolReconciler) reconcileDelete(ctx context.Context, scope *scope.ManagedMachinePoolScope) (reconcile.Result, error) {
   324  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureManagedMachinePoolReconciler.reconcileDelete")
   325  	defer done()
   326  
   327  	log.Info("Reconciling AzureManagedMachinePool delete")
   328  
   329  	if !scope.Cluster.DeletionTimestamp.IsZero() {
   330  		// Cluster was deleted, skip machine pool deletion and let AKS delete the whole cluster.
   331  		// So, remove the finalizer.
   332  		controllerutil.RemoveFinalizer(scope.InfraMachinePool, infrav1.ClusterFinalizer)
   333  	} else {
   334  		svc, err := ammpr.createAzureManagedMachinePoolService(scope, ammpr.Timeouts.DefaultedAzureServiceReconcileTimeout())
   335  		if err != nil {
   336  			return reconcile.Result{}, errors.Wrap(err, "failed to create an AzureManageMachinePoolService")
   337  		}
   338  
   339  		if err := svc.Delete(ctx); err != nil {
   340  			// Handle transient errors
   341  			var reconcileError azure.ReconcileError
   342  			if errors.As(err, &reconcileError) && reconcileError.IsTransient() {
   343  				if azure.IsOperationNotDoneError(reconcileError) {
   344  					log.V(2).Info(fmt.Sprintf("AzureManagedMachinePool delete not done: %s", reconcileError.Error()))
   345  				} else {
   346  					log.V(2).Info("transient failure to delete AzureManagedMachinePool, retrying")
   347  				}
   348  				return reconcile.Result{RequeueAfter: reconcileError.RequeueAfter()}, nil
   349  			}
   350  			return reconcile.Result{}, errors.Wrapf(err, "error deleting AzureManagedMachinePool %s/%s", scope.InfraMachinePool.Namespace, scope.InfraMachinePool.Name)
   351  		}
   352  		// Machine pool successfully deleted, remove the finalizer.
   353  		controllerutil.RemoveFinalizer(scope.InfraMachinePool, infrav1.ClusterFinalizer)
   354  	}
   355  
   356  	if err := scope.PatchObject(ctx); err != nil {
   357  		return reconcile.Result{}, err
   358  	}
   359  
   360  	return reconcile.Result{}, nil
   361  }