sigs.k8s.io/cluster-api-provider-azure@v1.17.0/controllers/azureasomanagedmachinepool_controller.go (about) 1 /* 2 Copyright 2024 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 "slices" 23 24 asocontainerservicev1 "github.com/Azure/azure-service-operator/v2/api/containerservice/v1api20231001" 25 corev1 "k8s.io/api/core/v1" 26 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 27 "k8s.io/apimachinery/pkg/types" 28 "k8s.io/apimachinery/pkg/util/validation" 29 "k8s.io/utils/ptr" 30 infrav1alpha "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha1" 31 "sigs.k8s.io/cluster-api-provider-azure/pkg/mutators" 32 "sigs.k8s.io/cluster-api-provider-azure/util/tele" 33 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 34 "sigs.k8s.io/cluster-api/controllers/external" 35 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 36 utilexp "sigs.k8s.io/cluster-api/exp/util" 37 "sigs.k8s.io/cluster-api/util" 38 "sigs.k8s.io/cluster-api/util/annotations" 39 "sigs.k8s.io/cluster-api/util/patch" 40 "sigs.k8s.io/cluster-api/util/predicates" 41 ctrl "sigs.k8s.io/controller-runtime" 42 "sigs.k8s.io/controller-runtime/pkg/builder" 43 "sigs.k8s.io/controller-runtime/pkg/client" 44 "sigs.k8s.io/controller-runtime/pkg/controller" 45 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 46 "sigs.k8s.io/controller-runtime/pkg/handler" 47 "sigs.k8s.io/controller-runtime/pkg/reconcile" 48 ) 49 50 // AzureASOManagedMachinePoolReconciler reconciles a AzureASOManagedMachinePool object. 51 type AzureASOManagedMachinePoolReconciler struct { 52 client.Client 53 WatchFilterValue string 54 Tracker ClusterTracker 55 56 newResourceReconciler func(*infrav1alpha.AzureASOManagedMachinePool, []*unstructured.Unstructured) resourceReconciler 57 } 58 59 // ClusterTracker wraps a CAPI remote.ClusterCacheTracker. 60 type ClusterTracker interface { 61 GetClient(context.Context, types.NamespacedName) (client.Client, error) 62 } 63 64 // SetupWithManager sets up the controller with the Manager. 65 func (r *AzureASOManagedMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 66 _, log, done := tele.StartSpanWithLogger(ctx, 67 "controllers.AzureASOManagedMachinePoolReconciler.SetupWithManager", 68 tele.KVP("controller", infrav1alpha.AzureASOManagedMachinePoolKind), 69 ) 70 defer done() 71 72 clusterToAzureASOManagedMachinePools, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &infrav1alpha.AzureASOManagedMachinePoolList{}, mgr.GetScheme()) 73 if err != nil { 74 return fmt.Errorf("failed to get Cluster to AzureASOManagedMachinePool mapper: %w", err) 75 } 76 77 c, err := ctrl.NewControllerManagedBy(mgr). 78 WithOptions(options). 79 For(&infrav1alpha.AzureASOManagedMachinePool{}). 80 WithEventFilter(predicates.ResourceHasFilterLabel(log, r.WatchFilterValue)). 81 Watches( 82 &clusterv1.Cluster{}, 83 handler.EnqueueRequestsFromMapFunc(clusterToAzureASOManagedMachinePools), 84 builder.WithPredicates( 85 predicates.ResourceHasFilterLabel(log, r.WatchFilterValue), 86 predicates.Any(log, 87 predicates.ClusterControlPlaneInitialized(log), 88 ClusterUpdatePauseChange(log), 89 ), 90 ), 91 ). 92 Watches( 93 &expv1.MachinePool{}, 94 handler.EnqueueRequestsFromMapFunc(utilexp.MachinePoolToInfrastructureMapFunc(ctx, 95 infrav1alpha.GroupVersion.WithKind(infrav1alpha.AzureASOManagedMachinePoolKind)), 96 ), 97 builder.WithPredicates( 98 predicates.ResourceHasFilterLabel(log, r.WatchFilterValue), 99 ), 100 ). 101 Build(r) 102 if err != nil { 103 return err 104 } 105 106 externalTracker := &external.ObjectTracker{ 107 Cache: mgr.GetCache(), 108 Controller: c, 109 } 110 111 r.newResourceReconciler = func(asoManagedCluster *infrav1alpha.AzureASOManagedMachinePool, resources []*unstructured.Unstructured) resourceReconciler { 112 return &ResourceReconciler{ 113 Client: r.Client, 114 resources: resources, 115 owner: asoManagedCluster, 116 watcher: externalTracker, 117 } 118 } 119 120 return nil 121 } 122 123 //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureasomanagedmachinepools,verbs=get;list;watch;create;update;patch;delete 124 //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureasomanagedmachinepools/status,verbs=get;update;patch 125 //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azureasomanagedmachinepools/finalizers,verbs=update 126 127 // Reconcile reconciles an AzureASOManagedMachinePool. 128 func (r *AzureASOManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, resultErr error) { 129 ctx, log, done := tele.StartSpanWithLogger(ctx, 130 "controllers.AzureASOManagedMachinePoolReconciler.Reconcile", 131 tele.KVP("namespace", req.Namespace), 132 tele.KVP("name", req.Name), 133 tele.KVP("kind", infrav1alpha.AzureASOManagedMachinePoolKind), 134 ) 135 defer done() 136 137 asoManagedMachinePool := &infrav1alpha.AzureASOManagedMachinePool{} 138 err := r.Get(ctx, req.NamespacedName, asoManagedMachinePool) 139 if err != nil { 140 return ctrl.Result{}, client.IgnoreNotFound(err) 141 } 142 143 patchHelper, err := patch.NewHelper(asoManagedMachinePool, r.Client) 144 if err != nil { 145 return ctrl.Result{}, fmt.Errorf("failed to create patch helper: %w", err) 146 } 147 defer func() { 148 err := patchHelper.Patch(ctx, asoManagedMachinePool) 149 if err != nil && resultErr == nil { 150 resultErr = err 151 result = ctrl.Result{} 152 } 153 }() 154 155 asoManagedMachinePool.Status.Ready = false 156 157 machinePool, err := utilexp.GetOwnerMachinePool(ctx, r.Client, asoManagedMachinePool.ObjectMeta) 158 if err != nil { 159 return ctrl.Result{}, err 160 } 161 if machinePool == nil { 162 log.V(4).Info("Waiting for MachinePool Controller to set OwnerRef on AzureASOManagedMachinePool") 163 return ctrl.Result{}, nil 164 } 165 166 machinePoolBefore := machinePool.DeepCopy() 167 defer func() { 168 // Skip using a patch helper here because we will never modify the MachinePool status. 169 err := r.Patch(ctx, machinePool, client.MergeFrom(machinePoolBefore)) 170 if err != nil && resultErr == nil { 171 resultErr = err 172 result = ctrl.Result{} 173 } 174 }() 175 176 cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) 177 if err != nil { 178 return ctrl.Result{}, fmt.Errorf("AzureASOManagedMachinePool owner MachinePool is missing cluster label or cluster does not exist: %w", err) 179 } 180 if cluster == nil { 181 log.Info(fmt.Sprintf("Waiting for MachinePool controller to set %s label on MachinePool", clusterv1.ClusterNameLabel)) 182 return ctrl.Result{}, nil 183 } 184 if cluster.Spec.ControlPlaneRef == nil || 185 cluster.Spec.ControlPlaneRef.APIVersion != infrav1alpha.GroupVersion.Identifier() || 186 cluster.Spec.ControlPlaneRef.Kind != infrav1alpha.AzureASOManagedControlPlaneKind { 187 return ctrl.Result{}, reconcile.TerminalError(fmt.Errorf("AzureASOManagedMachinePool cannot be used without AzureASOManagedControlPlane")) 188 } 189 190 if annotations.IsPaused(cluster, asoManagedMachinePool) { 191 return r.reconcilePaused(ctx, asoManagedMachinePool) 192 } 193 194 if !asoManagedMachinePool.DeletionTimestamp.IsZero() { 195 return r.reconcileDelete(ctx, asoManagedMachinePool, cluster) 196 } 197 198 return r.reconcileNormal(ctx, asoManagedMachinePool, machinePool, cluster) 199 } 200 201 func (r *AzureASOManagedMachinePoolReconciler) reconcileNormal(ctx context.Context, asoManagedMachinePool *infrav1alpha.AzureASOManagedMachinePool, machinePool *expv1.MachinePool, cluster *clusterv1.Cluster) (ctrl.Result, error) { 202 ctx, log, done := tele.StartSpanWithLogger(ctx, 203 "controllers.AzureASOManagedMachinePoolReconciler.reconcileNormal", 204 ) 205 defer done() 206 log.V(4).Info("reconciling normally") 207 208 needsPatch := controllerutil.AddFinalizer(asoManagedMachinePool, clusterv1.ClusterFinalizer) 209 needsPatch = AddBlockMoveAnnotation(asoManagedMachinePool) || needsPatch 210 if needsPatch { 211 return ctrl.Result{Requeue: true}, nil 212 } 213 214 resources, err := mutators.ApplyMutators(ctx, asoManagedMachinePool.Spec.Resources, mutators.SetAgentPoolDefaults(r.Client, machinePool)) 215 if err != nil { 216 return ctrl.Result{}, err 217 } 218 219 var agentPoolName string 220 for _, resource := range resources { 221 if resource.GroupVersionKind().Group == asocontainerservicev1.GroupVersion.Group && 222 resource.GroupVersionKind().Kind == "ManagedClustersAgentPool" { 223 agentPoolName = resource.GetName() 224 break 225 } 226 } 227 if agentPoolName == "" { 228 return ctrl.Result{}, reconcile.TerminalError(mutators.ErrNoManagedClustersAgentPoolDefined) 229 } 230 231 resourceReconciler := r.newResourceReconciler(asoManagedMachinePool, resources) 232 err = resourceReconciler.Reconcile(ctx) 233 if err != nil { 234 return ctrl.Result{}, fmt.Errorf("failed to reconcile resources: %w", err) 235 } 236 for _, status := range asoManagedMachinePool.Status.Resources { 237 if !status.Ready { 238 return ctrl.Result{}, nil 239 } 240 } 241 242 agentPool := &asocontainerservicev1.ManagedClustersAgentPool{} 243 err = r.Get(ctx, client.ObjectKey{Namespace: asoManagedMachinePool.Namespace, Name: agentPoolName}, agentPool) 244 if err != nil { 245 return ctrl.Result{}, fmt.Errorf("error getting ManagedClustersAgentPool: %w", err) 246 } 247 248 managedCluster := &asocontainerservicev1.ManagedCluster{} 249 err = r.Get(ctx, client.ObjectKey{Namespace: agentPool.Namespace, Name: agentPool.Owner().Name}, managedCluster) 250 if err != nil { 251 return ctrl.Result{}, fmt.Errorf("error getting ManagedCluster: %w", err) 252 } 253 if managedCluster.Status.NodeResourceGroup == nil { 254 return ctrl.Result{}, nil 255 } 256 rg := *managedCluster.Status.NodeResourceGroup 257 258 clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 259 if err != nil { 260 return ctrl.Result{}, err 261 } 262 nodes := &corev1.NodeList{} 263 err = clusterClient.List(ctx, nodes, 264 client.MatchingLabels(expectedNodeLabels(agentPool.AzureName(), rg)), 265 ) 266 if err != nil { 267 return ctrl.Result{}, fmt.Errorf("failed to list nodes in workload cluster: %w", err) 268 } 269 providerIDs := make([]string, 0, len(nodes.Items)) 270 for _, node := range nodes.Items { 271 if node.Spec.ProviderID == "" { 272 // the node will receive a provider id soon 273 return ctrl.Result{Requeue: true}, nil 274 } 275 providerIDs = append(providerIDs, node.Spec.ProviderID) 276 } 277 // Prevent a different order from updating the spec. 278 slices.Sort(providerIDs) 279 asoManagedMachinePool.Spec.ProviderIDList = providerIDs 280 asoManagedMachinePool.Status.Replicas = int32(ptr.Deref(agentPool.Status.Count, 0)) 281 if _, autoscaling := machinePool.Annotations[clusterv1.ReplicasManagedByAnnotation]; autoscaling { 282 machinePool.Spec.Replicas = &asoManagedMachinePool.Status.Replicas 283 } 284 285 asoManagedMachinePool.Status.Ready = true 286 287 return ctrl.Result{}, nil 288 } 289 290 func expectedNodeLabels(poolName, nodeRG string) map[string]string { 291 if len(poolName) > validation.LabelValueMaxLength { 292 poolName = poolName[:validation.LabelValueMaxLength] 293 } 294 if len(nodeRG) > validation.LabelValueMaxLength { 295 nodeRG = nodeRG[:validation.LabelValueMaxLength] 296 } 297 return map[string]string{ 298 "kubernetes.azure.com/agentpool": poolName, 299 "kubernetes.azure.com/cluster": nodeRG, 300 } 301 } 302 303 //nolint:unparam // an empty ctrl.Result is always returned here, leaving it as-is to avoid churn in refactoring later if that changes. 304 func (r *AzureASOManagedMachinePoolReconciler) reconcilePaused(ctx context.Context, asoManagedMachinePool *infrav1alpha.AzureASOManagedMachinePool) (ctrl.Result, error) { 305 ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureASOManagedMachinePoolReconciler.reconcilePaused") 306 defer done() 307 log.V(4).Info("reconciling pause") 308 309 resources, err := mutators.ToUnstructured(ctx, asoManagedMachinePool.Spec.Resources) 310 if err != nil { 311 return ctrl.Result{}, err 312 } 313 resourceReconciler := r.newResourceReconciler(asoManagedMachinePool, resources) 314 err = resourceReconciler.Pause(ctx) 315 if err != nil { 316 return ctrl.Result{}, fmt.Errorf("failed to pause resources: %w", err) 317 } 318 319 RemoveBlockMoveAnnotation(asoManagedMachinePool) 320 321 return ctrl.Result{}, nil 322 } 323 324 //nolint:unparam // an empty ctrl.Result is always returned here, leaving it as-is to avoid churn in refactoring later if that changes. 325 func (r *AzureASOManagedMachinePoolReconciler) reconcileDelete(ctx context.Context, asoManagedMachinePool *infrav1alpha.AzureASOManagedMachinePool, cluster *clusterv1.Cluster) (ctrl.Result, error) { 326 ctx, log, done := tele.StartSpanWithLogger(ctx, 327 "controllers.AzureASOManagedMachinePoolReconciler.reconcileDelete", 328 ) 329 defer done() 330 log.V(4).Info("reconciling delete") 331 332 // If the entire cluster is being deleted, this ASO ManagedClustersAgentPool will be deleted with the rest 333 // of the ManagedCluster. 334 if cluster.DeletionTimestamp.IsZero() { 335 resources, err := mutators.ToUnstructured(ctx, asoManagedMachinePool.Spec.Resources) 336 if err != nil { 337 return ctrl.Result{}, err 338 } 339 resourceReconciler := r.newResourceReconciler(asoManagedMachinePool, resources) 340 err = resourceReconciler.Delete(ctx) 341 if err != nil { 342 return ctrl.Result{}, fmt.Errorf("failed to reconcile resources: %w", err) 343 } 344 if len(asoManagedMachinePool.Status.Resources) > 0 { 345 return ctrl.Result{}, nil 346 } 347 } 348 349 controllerutil.RemoveFinalizer(asoManagedMachinePool, clusterv1.ClusterFinalizer) 350 return ctrl.Result{}, nil 351 }