sigs.k8s.io/cluster-api@v1.6.3/exp/internal/controllers/machinepool_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 23 "github.com/pkg/errors" 24 corev1 "k8s.io/api/core/v1" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 28 kerrors "k8s.io/apimachinery/pkg/util/errors" 29 "k8s.io/client-go/tools/record" 30 "k8s.io/klog/v2" 31 ctrl "sigs.k8s.io/controller-runtime" 32 "sigs.k8s.io/controller-runtime/pkg/builder" 33 "sigs.k8s.io/controller-runtime/pkg/client" 34 "sigs.k8s.io/controller-runtime/pkg/controller" 35 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 36 "sigs.k8s.io/controller-runtime/pkg/handler" 37 "sigs.k8s.io/controller-runtime/pkg/reconcile" 38 39 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 40 "sigs.k8s.io/cluster-api/api/v1beta1/index" 41 "sigs.k8s.io/cluster-api/controllers/external" 42 "sigs.k8s.io/cluster-api/controllers/remote" 43 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 44 "sigs.k8s.io/cluster-api/internal/util/ssa" 45 "sigs.k8s.io/cluster-api/util" 46 "sigs.k8s.io/cluster-api/util/annotations" 47 "sigs.k8s.io/cluster-api/util/conditions" 48 "sigs.k8s.io/cluster-api/util/patch" 49 "sigs.k8s.io/cluster-api/util/predicates" 50 ) 51 52 // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch 53 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch 54 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete 55 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete 56 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status;machinepools/finalizers,verbs=get;list;watch;create;update;patch;delete 57 58 var ( 59 // machinePoolKind contains the schema.GroupVersionKind for the MachinePool type. 60 machinePoolKind = clusterv1.GroupVersion.WithKind("MachinePool") 61 ) 62 63 const ( 64 // MachinePoolControllerName defines the controller used when creating clients. 65 MachinePoolControllerName = "machinepool-controller" 66 ) 67 68 // MachinePoolReconciler reconciles a MachinePool object. 69 type MachinePoolReconciler struct { 70 Client client.Client 71 APIReader client.Reader 72 Tracker *remote.ClusterCacheTracker 73 74 // WatchFilterValue is the label value used to filter events prior to reconciliation. 75 WatchFilterValue string 76 77 controller controller.Controller 78 ssaCache ssa.Cache 79 recorder record.EventRecorder 80 externalTracker external.ObjectTracker 81 } 82 83 func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 84 clusterToMachinePools, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &expv1.MachinePoolList{}, mgr.GetScheme()) 85 if err != nil { 86 return err 87 } 88 89 c, err := ctrl.NewControllerManagedBy(mgr). 90 For(&expv1.MachinePool{}). 91 WithOptions(options). 92 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 93 Watches( 94 &clusterv1.Cluster{}, 95 handler.EnqueueRequestsFromMapFunc(clusterToMachinePools), 96 // TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources? 97 builder.WithPredicates( 98 predicates.All(ctrl.LoggerFrom(ctx), 99 predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)), 100 predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), 101 ), 102 ), 103 ). 104 Build(r) 105 if err != nil { 106 return errors.Wrap(err, "failed setting up with a controller manager") 107 } 108 109 r.controller = c 110 r.recorder = mgr.GetEventRecorderFor("machinepool-controller") 111 r.externalTracker = external.ObjectTracker{ 112 Controller: c, 113 Cache: mgr.GetCache(), 114 } 115 r.ssaCache = ssa.NewCache() 116 117 return nil 118 } 119 120 func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 121 log := ctrl.LoggerFrom(ctx) 122 123 mp := &expv1.MachinePool{} 124 if err := r.Client.Get(ctx, req.NamespacedName, mp); err != nil { 125 if apierrors.IsNotFound(err) { 126 // Object not found, return. Created objects are automatically garbage collected. 127 // For additional cleanup logic use finalizers. 128 return ctrl.Result{}, nil 129 } 130 log.Error(err, "Error reading the object - requeue the request.") 131 return ctrl.Result{}, err 132 } 133 134 log = log.WithValues("Cluster", klog.KRef(mp.ObjectMeta.Namespace, mp.Spec.ClusterName)) 135 ctx = ctrl.LoggerInto(ctx, log) 136 137 cluster, err := util.GetClusterByName(ctx, r.Client, mp.ObjectMeta.Namespace, mp.Spec.ClusterName) 138 if err != nil { 139 log.Error(err, "Failed to get Cluster for MachinePool.", "MachinePool", klog.KObj(mp), "Cluster", klog.KRef(mp.ObjectMeta.Namespace, mp.Spec.ClusterName)) 140 return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machinepool %q in namespace %q", 141 mp.Spec.ClusterName, mp.Name, mp.Namespace) 142 } 143 144 // Return early if the object or Cluster is paused. 145 if annotations.IsPaused(cluster, mp) { 146 log.Info("Reconciliation is paused for this object") 147 return ctrl.Result{}, nil 148 } 149 150 // Initialize the patch helper. 151 patchHelper, err := patch.NewHelper(mp, r.Client) 152 if err != nil { 153 return ctrl.Result{}, err 154 } 155 156 defer func() { 157 r.reconcilePhase(mp) 158 // TODO(jpang): add support for metrics. 159 160 // Always update the readyCondition with the summary of the machinepool conditions. 161 conditions.SetSummary(mp, 162 conditions.WithConditions( 163 clusterv1.BootstrapReadyCondition, 164 clusterv1.InfrastructureReadyCondition, 165 expv1.ReplicasReadyCondition, 166 ), 167 ) 168 169 // Always attempt to patch the object and status after each reconciliation. 170 // Patch ObservedGeneration only if the reconciliation completed successfully 171 patchOpts := []patch.Option{ 172 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 173 clusterv1.ReadyCondition, 174 clusterv1.BootstrapReadyCondition, 175 clusterv1.InfrastructureReadyCondition, 176 expv1.ReplicasReadyCondition, 177 }}, 178 } 179 if reterr == nil { 180 patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) 181 } 182 if err := patchHelper.Patch(ctx, mp, patchOpts...); err != nil { 183 reterr = kerrors.NewAggregate([]error{reterr, err}) 184 } 185 }() 186 187 // Reconcile labels. 188 if mp.Labels == nil { 189 mp.Labels = make(map[string]string) 190 } 191 mp.Labels[clusterv1.ClusterNameLabel] = mp.Spec.ClusterName 192 193 // Handle deletion reconciliation loop. 194 if !mp.ObjectMeta.DeletionTimestamp.IsZero() { 195 err := r.reconcileDelete(ctx, cluster, mp) 196 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 197 // the current cluster because of concurrent access. 198 if errors.Is(err, remote.ErrClusterLocked) { 199 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 200 return ctrl.Result{Requeue: true}, nil 201 } 202 return ctrl.Result{}, err 203 } 204 205 // Add finalizer first if not set to avoid the race condition between init and delete. 206 // Note: Finalizers in general can only be added when the deletionTimestamp is not set. 207 if !controllerutil.ContainsFinalizer(mp, expv1.MachinePoolFinalizer) { 208 controllerutil.AddFinalizer(mp, expv1.MachinePoolFinalizer) 209 return ctrl.Result{}, nil 210 } 211 212 // Handle normal reconciliation loop. 213 res, err := r.reconcile(ctx, cluster, mp) 214 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 215 // the current cluster because of concurrent access. 216 if errors.Is(err, remote.ErrClusterLocked) { 217 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 218 return ctrl.Result{Requeue: true}, nil 219 } 220 return res, err 221 } 222 223 func (r *MachinePoolReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) (ctrl.Result, error) { 224 // Ensure the MachinePool is owned by the Cluster it belongs to. 225 mp.SetOwnerReferences(util.EnsureOwnerRef(mp.GetOwnerReferences(), metav1.OwnerReference{ 226 APIVersion: clusterv1.GroupVersion.String(), 227 Kind: cluster.Kind, 228 Name: cluster.Name, 229 UID: cluster.UID, 230 })) 231 232 phases := []func(context.Context, *clusterv1.Cluster, *expv1.MachinePool) (ctrl.Result, error){ 233 r.reconcileBootstrap, 234 r.reconcileInfrastructure, 235 r.reconcileNodeRefs, 236 } 237 238 res := ctrl.Result{} 239 errs := []error{} 240 for _, phase := range phases { 241 // Call the inner reconciliation methods. 242 phaseResult, err := phase(ctx, cluster, mp) 243 if err != nil { 244 errs = append(errs, err) 245 } 246 if len(errs) > 0 { 247 continue 248 } 249 250 res = util.LowestNonZeroResult(res, phaseResult) 251 } 252 return res, kerrors.NewAggregate(errs) 253 } 254 255 func (r *MachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) error { 256 if ok, err := r.reconcileDeleteExternal(ctx, mp); !ok || err != nil { 257 // Return early and don't remove the finalizer if we got an error or 258 // the external reconciliation deletion isn't ready. 259 return err 260 } 261 262 if err := r.reconcileDeleteNodes(ctx, cluster, mp); err != nil { 263 // Return early and don't remove the finalizer if we got an error. 264 return err 265 } 266 267 controllerutil.RemoveFinalizer(mp, expv1.MachinePoolFinalizer) 268 return nil 269 } 270 271 func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluster *clusterv1.Cluster, machinepool *expv1.MachinePool) error { 272 if len(machinepool.Status.NodeRefs) == 0 { 273 return nil 274 } 275 276 clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 277 if err != nil { 278 return err 279 } 280 281 return r.deleteRetiredNodes(ctx, clusterClient, machinepool.Status.NodeRefs, machinepool.Spec.ProviderIDList) 282 } 283 284 // reconcileDeleteExternal tries to delete external references, returning true if it cannot find any. 285 func (r *MachinePoolReconciler) reconcileDeleteExternal(ctx context.Context, m *expv1.MachinePool) (bool, error) { 286 objects := []*unstructured.Unstructured{} 287 references := []*corev1.ObjectReference{ 288 m.Spec.Template.Spec.Bootstrap.ConfigRef, 289 &m.Spec.Template.Spec.InfrastructureRef, 290 } 291 292 // Loop over the references and try to retrieve it with the client. 293 for _, ref := range references { 294 if ref == nil { 295 continue 296 } 297 298 obj, err := external.Get(ctx, r.Client, ref, m.Namespace) 299 if err != nil && !apierrors.IsNotFound(errors.Cause(err)) { 300 return false, errors.Wrapf(err, "failed to get %s %q for MachinePool %q in namespace %q", 301 ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace) 302 } 303 if obj != nil { 304 objects = append(objects, obj) 305 } 306 } 307 308 // Issue a delete request for any object that has been found. 309 for _, obj := range objects { 310 if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) { 311 return false, errors.Wrapf(err, 312 "failed to delete %v %q for MachinePool %q in namespace %q", 313 obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace) 314 } 315 } 316 317 // Return true if there are no more external objects. 318 return len(objects) == 0, nil 319 } 320 321 func (r *MachinePoolReconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error { 322 log := ctrl.LoggerFrom(ctx) 323 324 if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { 325 log.V(5).Info("Skipping node watching setup because control plane is not initialized") 326 return nil 327 } 328 329 // If there is no tracker, don't watch remote nodes 330 if r.Tracker == nil { 331 return nil 332 } 333 334 return r.Tracker.Watch(ctx, remote.WatchInput{ 335 Name: "machinepool-watchNodes", 336 Cluster: util.ObjectKey(cluster), 337 Watcher: r.controller, 338 Kind: &corev1.Node{}, 339 EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachinePool), 340 }) 341 } 342 343 func (r *MachinePoolReconciler) nodeToMachinePool(ctx context.Context, o client.Object) []reconcile.Request { 344 node, ok := o.(*corev1.Node) 345 if !ok { 346 panic(fmt.Sprintf("Expected a Node but got a %T", o)) 347 } 348 349 var filters []client.ListOption 350 // Match by clusterName when the node has the annotation. 351 if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok { 352 filters = append(filters, client.MatchingLabels{ 353 clusterv1.ClusterNameLabel: clusterName, 354 }) 355 } 356 357 // Match by namespace when the node has the annotation. 358 if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok { 359 filters = append(filters, client.InNamespace(namespace)) 360 } 361 362 // Match by nodeName and status.nodeRef.name. 363 machinePoolList := &expv1.MachinePoolList{} 364 if err := r.Client.List( 365 ctx, 366 machinePoolList, 367 append(filters, client.MatchingFields{index.MachinePoolNodeNameField: node.Name})...); err != nil { 368 return nil 369 } 370 371 // There should be exactly 1 MachinePool for the node. 372 if len(machinePoolList.Items) == 1 { 373 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machinePoolList.Items[0])}} 374 } 375 376 // Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet. 377 // Match by providerID 378 if node.Spec.ProviderID == "" { 379 return nil 380 } 381 machinePoolList = &expv1.MachinePoolList{} 382 if err := r.Client.List( 383 ctx, 384 machinePoolList, 385 append(filters, client.MatchingFields{index.MachinePoolProviderIDField: node.Spec.ProviderID})...); err != nil { 386 return nil 387 } 388 389 // There should be exactly 1 MachinePool for the node. 390 if len(machinePoolList.Items) == 1 { 391 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machinePoolList.Items[0])}} 392 } 393 394 return nil 395 }