sigs.k8s.io/cluster-api@v1.7.1/exp/internal/controllers/machinepool_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "github.com/pkg/errors" 25 corev1 "k8s.io/api/core/v1" 26 apierrors "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 29 kerrors "k8s.io/apimachinery/pkg/util/errors" 30 "k8s.io/client-go/tools/record" 31 "k8s.io/klog/v2" 32 ctrl "sigs.k8s.io/controller-runtime" 33 "sigs.k8s.io/controller-runtime/pkg/builder" 34 "sigs.k8s.io/controller-runtime/pkg/client" 35 "sigs.k8s.io/controller-runtime/pkg/controller" 36 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 37 "sigs.k8s.io/controller-runtime/pkg/handler" 38 "sigs.k8s.io/controller-runtime/pkg/reconcile" 39 40 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 41 "sigs.k8s.io/cluster-api/api/v1beta1/index" 42 "sigs.k8s.io/cluster-api/controllers/external" 43 "sigs.k8s.io/cluster-api/controllers/remote" 44 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 45 "sigs.k8s.io/cluster-api/internal/util/ssa" 46 "sigs.k8s.io/cluster-api/util" 47 "sigs.k8s.io/cluster-api/util/annotations" 48 "sigs.k8s.io/cluster-api/util/conditions" 49 "sigs.k8s.io/cluster-api/util/patch" 50 "sigs.k8s.io/cluster-api/util/predicates" 51 ) 52 53 // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch 54 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch 55 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete 56 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete 57 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status;machinepools/finalizers,verbs=get;list;watch;create;update;patch;delete 58 59 var ( 60 // machinePoolKind contains the schema.GroupVersionKind for the MachinePool type. 61 machinePoolKind = clusterv1.GroupVersion.WithKind("MachinePool") 62 ) 63 64 const ( 65 // MachinePoolControllerName defines the controller used when creating clients. 66 MachinePoolControllerName = "machinepool-controller" 67 ) 68 69 // MachinePoolReconciler reconciles a MachinePool object. 70 type MachinePoolReconciler struct { 71 Client client.Client 72 APIReader client.Reader 73 Tracker *remote.ClusterCacheTracker 74 75 // WatchFilterValue is the label value used to filter events prior to reconciliation. 76 WatchFilterValue string 77 78 controller controller.Controller 79 ssaCache ssa.Cache 80 recorder record.EventRecorder 81 externalTracker external.ObjectTracker 82 } 83 84 func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 85 clusterToMachinePools, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &expv1.MachinePoolList{}, mgr.GetScheme()) 86 if err != nil { 87 return err 88 } 89 90 c, err := ctrl.NewControllerManagedBy(mgr). 91 For(&expv1.MachinePool{}). 92 WithOptions(options). 93 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 94 Watches( 95 &clusterv1.Cluster{}, 96 handler.EnqueueRequestsFromMapFunc(clusterToMachinePools), 97 // TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources? 98 builder.WithPredicates( 99 predicates.All(ctrl.LoggerFrom(ctx), 100 predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)), 101 predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), 102 ), 103 ), 104 ). 105 Build(r) 106 if err != nil { 107 return errors.Wrap(err, "failed setting up with a controller manager") 108 } 109 110 r.controller = c 111 r.recorder = mgr.GetEventRecorderFor("machinepool-controller") 112 r.externalTracker = external.ObjectTracker{ 113 Controller: c, 114 Cache: mgr.GetCache(), 115 } 116 r.ssaCache = ssa.NewCache() 117 118 return nil 119 } 120 121 func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 122 log := ctrl.LoggerFrom(ctx) 123 124 mp := &expv1.MachinePool{} 125 if err := r.Client.Get(ctx, req.NamespacedName, mp); err != nil { 126 if apierrors.IsNotFound(err) { 127 // Object not found, return. Created objects are automatically garbage collected. 128 // For additional cleanup logic use finalizers. 129 return ctrl.Result{}, nil 130 } 131 log.Error(err, "Error reading the object - requeue the request.") 132 return ctrl.Result{}, err 133 } 134 135 log = log.WithValues("Cluster", klog.KRef(mp.ObjectMeta.Namespace, mp.Spec.ClusterName)) 136 ctx = ctrl.LoggerInto(ctx, log) 137 138 cluster, err := util.GetClusterByName(ctx, r.Client, mp.ObjectMeta.Namespace, mp.Spec.ClusterName) 139 if err != nil { 140 log.Error(err, "Failed to get Cluster for MachinePool.", "MachinePool", klog.KObj(mp), "Cluster", klog.KRef(mp.ObjectMeta.Namespace, mp.Spec.ClusterName)) 141 return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machinepool %q in namespace %q", 142 mp.Spec.ClusterName, mp.Name, mp.Namespace) 143 } 144 145 // Return early if the object or Cluster is paused. 146 if annotations.IsPaused(cluster, mp) { 147 log.Info("Reconciliation is paused for this object") 148 return ctrl.Result{}, nil 149 } 150 151 // Initialize the patch helper. 152 patchHelper, err := patch.NewHelper(mp, r.Client) 153 if err != nil { 154 return ctrl.Result{}, err 155 } 156 157 defer func() { 158 r.reconcilePhase(mp) 159 // TODO(jpang): add support for metrics. 160 161 // Always update the readyCondition with the summary of the machinepool conditions. 162 conditions.SetSummary(mp, 163 conditions.WithConditions( 164 clusterv1.BootstrapReadyCondition, 165 clusterv1.InfrastructureReadyCondition, 166 expv1.ReplicasReadyCondition, 167 ), 168 ) 169 170 // Always attempt to patch the object and status after each reconciliation. 171 // Patch ObservedGeneration only if the reconciliation completed successfully 172 patchOpts := []patch.Option{ 173 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 174 clusterv1.ReadyCondition, 175 clusterv1.BootstrapReadyCondition, 176 clusterv1.InfrastructureReadyCondition, 177 expv1.ReplicasReadyCondition, 178 }}, 179 } 180 if reterr == nil { 181 patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) 182 } 183 if err := patchHelper.Patch(ctx, mp, patchOpts...); err != nil { 184 reterr = kerrors.NewAggregate([]error{reterr, err}) 185 } 186 }() 187 188 // Reconcile labels. 189 if mp.Labels == nil { 190 mp.Labels = make(map[string]string) 191 } 192 mp.Labels[clusterv1.ClusterNameLabel] = mp.Spec.ClusterName 193 194 // Handle deletion reconciliation loop. 195 if !mp.ObjectMeta.DeletionTimestamp.IsZero() { 196 err := r.reconcileDelete(ctx, cluster, mp) 197 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 198 // the current cluster because of concurrent access. 199 if errors.Is(err, remote.ErrClusterLocked) { 200 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 201 return ctrl.Result{RequeueAfter: time.Minute}, nil 202 } 203 return ctrl.Result{}, err 204 } 205 206 // Add finalizer first if not set to avoid the race condition between init and delete. 207 // Note: Finalizers in general can only be added when the deletionTimestamp is not set. 208 if !controllerutil.ContainsFinalizer(mp, expv1.MachinePoolFinalizer) { 209 controllerutil.AddFinalizer(mp, expv1.MachinePoolFinalizer) 210 return ctrl.Result{}, nil 211 } 212 213 // Handle normal reconciliation loop. 214 res, err := r.reconcile(ctx, cluster, mp) 215 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 216 // the current cluster because of concurrent access. 217 if errors.Is(err, remote.ErrClusterLocked) { 218 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 219 return ctrl.Result{RequeueAfter: time.Minute}, nil 220 } 221 return res, err 222 } 223 224 func (r *MachinePoolReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) (ctrl.Result, error) { 225 // Ensure the MachinePool is owned by the Cluster it belongs to. 226 mp.SetOwnerReferences(util.EnsureOwnerRef(mp.GetOwnerReferences(), metav1.OwnerReference{ 227 APIVersion: clusterv1.GroupVersion.String(), 228 Kind: "Cluster", 229 Name: cluster.Name, 230 UID: cluster.UID, 231 })) 232 233 phases := []func(context.Context, *clusterv1.Cluster, *expv1.MachinePool) (ctrl.Result, error){ 234 r.reconcileBootstrap, 235 r.reconcileInfrastructure, 236 r.reconcileNodeRefs, 237 } 238 239 res := ctrl.Result{} 240 errs := []error{} 241 for _, phase := range phases { 242 // Call the inner reconciliation methods. 243 phaseResult, err := phase(ctx, cluster, mp) 244 if err != nil { 245 errs = append(errs, err) 246 } 247 if len(errs) > 0 { 248 continue 249 } 250 251 res = util.LowestNonZeroResult(res, phaseResult) 252 } 253 return res, kerrors.NewAggregate(errs) 254 } 255 256 func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) error { 257 if ok, err := r.reconcileDeleteExternal(ctx, mp); !ok || err != nil { 258 // Return early and don't remove the finalizer if we got an error or 259 // the external reconciliation deletion isn't ready. 260 return err 261 } 262 263 if err := r.reconcileDeleteNodes(ctx, cluster, mp); err != nil { 264 // Return early and don't remove the finalizer if we got an error. 265 return err 266 } 267 268 controllerutil.RemoveFinalizer(mp, expv1.MachinePoolFinalizer) 269 return nil 270 } 271 272 func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinepool *expv1.MachinePool) error { 273 if len(machinepool.Status.NodeRefs) == 0 { 274 return nil 275 } 276 277 clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 278 if err != nil { 279 return err 280 } 281 282 return r.deleteRetiredNodes(ctx, clusterClient, machinepool.Status.NodeRefs, machinepool.Spec.ProviderIDList) 283 } 284 285 // reconcileDeleteExternal tries to delete external references, returning true if it cannot find any. 286 func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *expv1.MachinePool) (bool, error) { 287 objects := []*unstructured.Unstructured{} 288 references := []*corev1.ObjectReference{ 289 m.Spec.Template.Spec.Bootstrap.ConfigRef, 290 &m.Spec.Template.Spec.InfrastructureRef, 291 } 292 293 // Loop over the references and try to retrieve it with the client. 294 for _, ref := range references { 295 if ref == nil { 296 continue 297 } 298 299 obj, err := external.Get(ctx, r.Client, ref, m.Namespace) 300 if err != nil && !apierrors.IsNotFound(errors.Cause(err)) { 301 return false, errors.Wrapf(err, "failed to get %s %q for MachinePool %q in namespace %q", 302 ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace) 303 } 304 if obj != nil { 305 objects = append(objects, obj) 306 } 307 } 308 309 // Issue a delete request for any object that has been found. 310 for _, obj := range objects { 311 if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) { 312 return false, errors.Wrapf(err, 313 "failed to delete %v %q for MachinePool %q in namespace %q", 314 obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace) 315 } 316 } 317 318 // Return true if there are no more external objects. 319 return len(objects) == 0, nil 320 } 321 322 func (r *MachinePoolReconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error { 323 log := ctrl.LoggerFrom(ctx) 324 325 if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { 326 log.V(5).Info("Skipping node watching setup because control plane is not initialized") 327 return nil 328 } 329 330 // If there is no tracker, don't watch remote nodes 331 if r.Tracker == nil { 332 return nil 333 } 334 335 return r.Tracker.Watch(ctx, remote.WatchInput{ 336 Name: "machinepool-watchNodes", 337 Cluster: util.ObjectKey(cluster), 338 Watcher: r.controller, 339 Kind: &corev1.Node{}, 340 EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachinePool), 341 }) 342 } 343 344 func (r *MachinePoolReconciler) nodeToMachinePool(ctx context.Context, o client.Object) []reconcile.Request { 345 node, ok := o.(*corev1.Node) 346 if !ok { 347 panic(fmt.Sprintf("Expected a Node but got a %T", o)) 348 } 349 350 var filters []client.ListOption 351 // Match by clusterName when the node has the annotation. 352 if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok { 353 filters = append(filters, client.MatchingLabels{ 354 clusterv1.ClusterNameLabel: clusterName, 355 }) 356 } 357 358 // Match by namespace when the node has the annotation. 359 if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok { 360 filters = append(filters, client.InNamespace(namespace)) 361 } 362 363 // Match by nodeName and status.nodeRef.name. 364 machinePoolList := &expv1.MachinePoolList{} 365 if err := r.Client.List( 366 ctx, 367 machinePoolList, 368 append(filters, client.MatchingFields{index.MachinePoolNodeNameField: node.Name})...); err != nil { 369 return nil 370 } 371 372 // There should be exactly 1 MachinePool for the node. 373 if len(machinePoolList.Items) == 1 { 374 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machinePoolList.Items[0])}} 375 } 376 377 // Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet. 378 // Match by providerID 379 if node.Spec.ProviderID == "" { 380 return nil 381 } 382 machinePoolList = &expv1.MachinePoolList{} 383 if err := r.Client.List( 384 ctx, 385 machinePoolList, 386 append(filters, client.MatchingFields{index.MachinePoolProviderIDField: node.Spec.ProviderID})...); err != nil { 387 return nil 388 } 389 390 // There should be exactly 1 MachinePool for the node. 391 if len(machinePoolList.Items) == 1 { 392 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machinePoolList.Items[0])}} 393 } 394 395 return nil 396 }