sigs.k8s.io/cluster-api-provider-azure@v1.17.0/controllers/azureasomanagedmachinepool_controller.go (about)

     1  /*
     2  Copyright 2024 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"slices"
    23  
    24  	asocontainerservicev1 "github.com/Azure/azure-service-operator/v2/api/containerservice/v1api20231001"
    25  	corev1 "k8s.io/api/core/v1"
    26  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    27  	"k8s.io/apimachinery/pkg/types"
    28  	"k8s.io/apimachinery/pkg/util/validation"
    29  	"k8s.io/utils/ptr"
    30  	infrav1alpha "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha1"
    31  	"sigs.k8s.io/cluster-api-provider-azure/pkg/mutators"
    32  	"sigs.k8s.io/cluster-api-provider-azure/util/tele"
    33  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    34  	"sigs.k8s.io/cluster-api/controllers/external"
    35  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    36  	utilexp "sigs.k8s.io/cluster-api/exp/util"
    37  	"sigs.k8s.io/cluster-api/util"
    38  	"sigs.k8s.io/cluster-api/util/annotations"
    39  	"sigs.k8s.io/cluster-api/util/patch"
    40  	"sigs.k8s.io/cluster-api/util/predicates"
    41  	ctrl "sigs.k8s.io/controller-runtime"
    42  	"sigs.k8s.io/controller-runtime/pkg/builder"
    43  	"sigs.k8s.io/controller-runtime/pkg/client"
    44  	"sigs.k8s.io/controller-runtime/pkg/controller"
    45  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    46  	"sigs.k8s.io/controller-runtime/pkg/handler"
    47  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    48  )
    49  
    50  // AzureASOManagedMachinePoolReconciler reconciles a AzureASOManagedMachinePool object.
    51  type AzureASOManagedMachinePoolReconciler struct {
    52  	client.Client
    53  	WatchFilterValue string
    54  	Tracker          ClusterTracker
    55  
    56  	newResourceReconciler func(*infrav1alpha.AzureASOManagedMachinePool, []*unstructured.Unstructured) resourceReconciler
    57  }
    58  
    59  // ClusterTracker wraps a CAPI remote.ClusterCacheTracker.
    60  type ClusterTracker interface {
    61  	GetClient(context.Context, types.NamespacedName) (client.Client, error)
    62  }
    63  
    64  // SetupWithManager sets up the controller with the Manager.
    65  func (r *AzureASOManagedMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    66  	_, log, done := tele.StartSpanWithLogger(ctx,
    67  		"controllers.AzureASOManagedMachinePoolReconciler.SetupWithManager",
    68  		tele.KVP("controller", infrav1alpha.AzureASOManagedMachinePoolKind),
    69  	)
    70  	defer done()
    71  
    72  	clusterToAzureASOManagedMachinePools, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &infrav1alpha.AzureASOManagedMachinePoolList{}, mgr.GetScheme())
    73  	if err != nil {
    74  		return fmt.Errorf("failed to get Cluster to AzureASOManagedMachinePool mapper: %w", err)
    75  	}
    76  
    77  	c, err := ctrl.NewControllerManagedBy(mgr).
    78  		WithOptions(options).
    79  		For(&infrav1alpha.AzureASOManagedMachinePool{}).
    80  		WithEventFilter(predicates.ResourceHasFilterLabel(log, r.WatchFilterValue)).
    81  		Watches(
    82  			&clusterv1.Cluster{},
    83  			handler.EnqueueRequestsFromMapFunc(clusterToAzureASOManagedMachinePools),
    84  			builder.WithPredicates(
    85  				predicates.ResourceHasFilterLabel(log, r.WatchFilterValue),
    86  				predicates.Any(log,
    87  					predicates.ClusterControlPlaneInitialized(log),
    88  					ClusterUpdatePauseChange(log),
    89  				),
    90  			),
    91  		).
    92  		Watches(
    93  			&expv1.MachinePool{},
    94  			handler.EnqueueRequestsFromMapFunc(utilexp.MachinePoolToInfrastructureMapFunc(ctx,
    95  				infrav1alpha.GroupVersion.WithKind(infrav1alpha.AzureASOManagedMachinePoolKind)),
    96  			),
    97  			builder.WithPredicates(
    98  				predicates.ResourceHasFilterLabel(log, r.WatchFilterValue),
    99  			),
   100  		).
   101  		Build(r)
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	externalTracker := &external.ObjectTracker{
   107  		Cache:      mgr.GetCache(),
   108  		Controller: c,
   109  	}
   110  
   111  	r.newResourceReconciler = func(asoManagedCluster *infrav1alpha.AzureASOManagedMachinePool, resources []*unstructured.Unstructured) resourceReconciler {
   112  		return &ResourceReconciler{
   113  			Client:    r.Client,
   114  			resources: resources,
   115  			owner:     asoManagedCluster,
   116  			watcher:   externalTracker,
   117  		}
   118  	}
   119  
   120  	return nil
   121  }
   122  
   123  //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureasomanagedmachinepools,verbs=get;list;watch;create;update;patch;delete
   124  //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureasomanagedmachinepools/status,verbs=get;update;patch
   125  //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureasomanagedmachinepools/finalizers,verbs=update
   126  
   127  // Reconcile reconciles an AzureASOManagedMachinePool.
   128  func (r *AzureASOManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, resultErr error) {
   129  	ctx, log, done := tele.StartSpanWithLogger(ctx,
   130  		"controllers.AzureASOManagedMachinePoolReconciler.Reconcile",
   131  		tele.KVP("namespace", req.Namespace),
   132  		tele.KVP("name", req.Name),
   133  		tele.KVP("kind", infrav1alpha.AzureASOManagedMachinePoolKind),
   134  	)
   135  	defer done()
   136  
   137  	asoManagedMachinePool := &infrav1alpha.AzureASOManagedMachinePool{}
   138  	err := r.Get(ctx, req.NamespacedName, asoManagedMachinePool)
   139  	if err != nil {
   140  		return ctrl.Result{}, client.IgnoreNotFound(err)
   141  	}
   142  
   143  	patchHelper, err := patch.NewHelper(asoManagedMachinePool, r.Client)
   144  	if err != nil {
   145  		return ctrl.Result{}, fmt.Errorf("failed to create patch helper: %w", err)
   146  	}
   147  	defer func() {
   148  		err := patchHelper.Patch(ctx, asoManagedMachinePool)
   149  		if err != nil && resultErr == nil {
   150  			resultErr = err
   151  			result = ctrl.Result{}
   152  		}
   153  	}()
   154  
   155  	asoManagedMachinePool.Status.Ready = false
   156  
   157  	machinePool, err := utilexp.GetOwnerMachinePool(ctx, r.Client, asoManagedMachinePool.ObjectMeta)
   158  	if err != nil {
   159  		return ctrl.Result{}, err
   160  	}
   161  	if machinePool == nil {
   162  		log.V(4).Info("Waiting for MachinePool Controller to set OwnerRef on AzureASOManagedMachinePool")
   163  		return ctrl.Result{}, nil
   164  	}
   165  
   166  	machinePoolBefore := machinePool.DeepCopy()
   167  	defer func() {
   168  		// Skip using a patch helper here because we will never modify the MachinePool status.
   169  		err := r.Patch(ctx, machinePool, client.MergeFrom(machinePoolBefore))
   170  		if err != nil && resultErr == nil {
   171  			resultErr = err
   172  			result = ctrl.Result{}
   173  		}
   174  	}()
   175  
   176  	cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta)
   177  	if err != nil {
   178  		return ctrl.Result{}, fmt.Errorf("AzureASOManagedMachinePool owner MachinePool is missing cluster label or cluster does not exist: %w", err)
   179  	}
   180  	if cluster == nil {
   181  		log.Info(fmt.Sprintf("Waiting for MachinePool controller to set %s label on MachinePool", clusterv1.ClusterNameLabel))
   182  		return ctrl.Result{}, nil
   183  	}
   184  	if cluster.Spec.ControlPlaneRef == nil ||
   185  		cluster.Spec.ControlPlaneRef.APIVersion != infrav1alpha.GroupVersion.Identifier() ||
   186  		cluster.Spec.ControlPlaneRef.Kind != infrav1alpha.AzureASOManagedControlPlaneKind {
   187  		return ctrl.Result{}, reconcile.TerminalError(fmt.Errorf("AzureASOManagedMachinePool cannot be used without AzureASOManagedControlPlane"))
   188  	}
   189  
   190  	if annotations.IsPaused(cluster, asoManagedMachinePool) {
   191  		return r.reconcilePaused(ctx, asoManagedMachinePool)
   192  	}
   193  
   194  	if !asoManagedMachinePool.DeletionTimestamp.IsZero() {
   195  		return r.reconcileDelete(ctx, asoManagedMachinePool, cluster)
   196  	}
   197  
   198  	return r.reconcileNormal(ctx, asoManagedMachinePool, machinePool, cluster)
   199  }
   200  
   201  func (r *AzureASOManagedMachinePoolReconciler) reconcileNormal(ctx context.Context, asoManagedMachinePool *infrav1alpha.AzureASOManagedMachinePool, machinePool *expv1.MachinePool, cluster *clusterv1.Cluster) (ctrl.Result, error) {
   202  	ctx, log, done := tele.StartSpanWithLogger(ctx,
   203  		"controllers.AzureASOManagedMachinePoolReconciler.reconcileNormal",
   204  	)
   205  	defer done()
   206  	log.V(4).Info("reconciling normally")
   207  
   208  	needsPatch := controllerutil.AddFinalizer(asoManagedMachinePool, clusterv1.ClusterFinalizer)
   209  	needsPatch = AddBlockMoveAnnotation(asoManagedMachinePool) || needsPatch
   210  	if needsPatch {
   211  		return ctrl.Result{Requeue: true}, nil
   212  	}
   213  
   214  	resources, err := mutators.ApplyMutators(ctx, asoManagedMachinePool.Spec.Resources, mutators.SetAgentPoolDefaults(r.Client, machinePool))
   215  	if err != nil {
   216  		return ctrl.Result{}, err
   217  	}
   218  
   219  	var agentPoolName string
   220  	for _, resource := range resources {
   221  		if resource.GroupVersionKind().Group == asocontainerservicev1.GroupVersion.Group &&
   222  			resource.GroupVersionKind().Kind == "ManagedClustersAgentPool" {
   223  			agentPoolName = resource.GetName()
   224  			break
   225  		}
   226  	}
   227  	if agentPoolName == "" {
   228  		return ctrl.Result{}, reconcile.TerminalError(mutators.ErrNoManagedClustersAgentPoolDefined)
   229  	}
   230  
   231  	resourceReconciler := r.newResourceReconciler(asoManagedMachinePool, resources)
   232  	err = resourceReconciler.Reconcile(ctx)
   233  	if err != nil {
   234  		return ctrl.Result{}, fmt.Errorf("failed to reconcile resources: %w", err)
   235  	}
   236  	for _, status := range asoManagedMachinePool.Status.Resources {
   237  		if !status.Ready {
   238  			return ctrl.Result{}, nil
   239  		}
   240  	}
   241  
   242  	agentPool := &asocontainerservicev1.ManagedClustersAgentPool{}
   243  	err = r.Get(ctx, client.ObjectKey{Namespace: asoManagedMachinePool.Namespace, Name: agentPoolName}, agentPool)
   244  	if err != nil {
   245  		return ctrl.Result{}, fmt.Errorf("error getting ManagedClustersAgentPool: %w", err)
   246  	}
   247  
   248  	managedCluster := &asocontainerservicev1.ManagedCluster{}
   249  	err = r.Get(ctx, client.ObjectKey{Namespace: agentPool.Namespace, Name: agentPool.Owner().Name}, managedCluster)
   250  	if err != nil {
   251  		return ctrl.Result{}, fmt.Errorf("error getting ManagedCluster: %w", err)
   252  	}
   253  	if managedCluster.Status.NodeResourceGroup == nil {
   254  		return ctrl.Result{}, nil
   255  	}
   256  	rg := *managedCluster.Status.NodeResourceGroup
   257  
   258  	clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   259  	if err != nil {
   260  		return ctrl.Result{}, err
   261  	}
   262  	nodes := &corev1.NodeList{}
   263  	err = clusterClient.List(ctx, nodes,
   264  		client.MatchingLabels(expectedNodeLabels(agentPool.AzureName(), rg)),
   265  	)
   266  	if err != nil {
   267  		return ctrl.Result{}, fmt.Errorf("failed to list nodes in workload cluster: %w", err)
   268  	}
   269  	providerIDs := make([]string, 0, len(nodes.Items))
   270  	for _, node := range nodes.Items {
   271  		if node.Spec.ProviderID == "" {
   272  			// the node will receive a provider id soon
   273  			return ctrl.Result{Requeue: true}, nil
   274  		}
   275  		providerIDs = append(providerIDs, node.Spec.ProviderID)
   276  	}
   277  	// Prevent a different order from updating the spec.
   278  	slices.Sort(providerIDs)
   279  	asoManagedMachinePool.Spec.ProviderIDList = providerIDs
   280  	asoManagedMachinePool.Status.Replicas = int32(ptr.Deref(agentPool.Status.Count, 0))
   281  	if _, autoscaling := machinePool.Annotations[clusterv1.ReplicasManagedByAnnotation]; autoscaling {
   282  		machinePool.Spec.Replicas = &asoManagedMachinePool.Status.Replicas
   283  	}
   284  
   285  	asoManagedMachinePool.Status.Ready = true
   286  
   287  	return ctrl.Result{}, nil
   288  }
   289  
   290  func expectedNodeLabels(poolName, nodeRG string) map[string]string {
   291  	if len(poolName) > validation.LabelValueMaxLength {
   292  		poolName = poolName[:validation.LabelValueMaxLength]
   293  	}
   294  	if len(nodeRG) > validation.LabelValueMaxLength {
   295  		nodeRG = nodeRG[:validation.LabelValueMaxLength]
   296  	}
   297  	return map[string]string{
   298  		"kubernetes.azure.com/agentpool": poolName,
   299  		"kubernetes.azure.com/cluster":   nodeRG,
   300  	}
   301  }
   302  
   303  //nolint:unparam // an empty ctrl.Result is always returned here, leaving it as-is to avoid churn in refactoring later if that changes.
   304  func (r *AzureASOManagedMachinePoolReconciler) reconcilePaused(ctx context.Context, asoManagedMachinePool *infrav1alpha.AzureASOManagedMachinePool) (ctrl.Result, error) {
   305  	ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureASOManagedMachinePoolReconciler.reconcilePaused")
   306  	defer done()
   307  	log.V(4).Info("reconciling pause")
   308  
   309  	resources, err := mutators.ToUnstructured(ctx, asoManagedMachinePool.Spec.Resources)
   310  	if err != nil {
   311  		return ctrl.Result{}, err
   312  	}
   313  	resourceReconciler := r.newResourceReconciler(asoManagedMachinePool, resources)
   314  	err = resourceReconciler.Pause(ctx)
   315  	if err != nil {
   316  		return ctrl.Result{}, fmt.Errorf("failed to pause resources: %w", err)
   317  	}
   318  
   319  	RemoveBlockMoveAnnotation(asoManagedMachinePool)
   320  
   321  	return ctrl.Result{}, nil
   322  }
   323  
   324  //nolint:unparam // an empty ctrl.Result is always returned here, leaving it as-is to avoid churn in refactoring later if that changes.
   325  func (r *AzureASOManagedMachinePoolReconciler) reconcileDelete(ctx context.Context, asoManagedMachinePool *infrav1alpha.AzureASOManagedMachinePool, cluster *clusterv1.Cluster) (ctrl.Result, error) {
   326  	ctx, log, done := tele.StartSpanWithLogger(ctx,
   327  		"controllers.AzureASOManagedMachinePoolReconciler.reconcileDelete",
   328  	)
   329  	defer done()
   330  	log.V(4).Info("reconciling delete")
   331  
   332  	// If the entire cluster is being deleted, this ASO ManagedClustersAgentPool will be deleted with the rest
   333  	// of the ManagedCluster.
   334  	if cluster.DeletionTimestamp.IsZero() {
   335  		resources, err := mutators.ToUnstructured(ctx, asoManagedMachinePool.Spec.Resources)
   336  		if err != nil {
   337  			return ctrl.Result{}, err
   338  		}
   339  		resourceReconciler := r.newResourceReconciler(asoManagedMachinePool, resources)
   340  		err = resourceReconciler.Delete(ctx)
   341  		if err != nil {
   342  			return ctrl.Result{}, fmt.Errorf("failed to reconcile resources: %w", err)
   343  		}
   344  		if len(asoManagedMachinePool.Status.Resources) > 0 {
   345  			return ctrl.Result{}, nil
   346  		}
   347  	}
   348  
   349  	controllerutil.RemoveFinalizer(asoManagedMachinePool, clusterv1.ClusterFinalizer)
   350  	return ctrl.Result{}, nil
   351  }