sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/cluster/cluster_controller.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cluster 18 19 import ( 20 "context" 21 "fmt" 22 "path" 23 "strings" 24 "time" 25 26 "github.com/pkg/errors" 27 corev1 "k8s.io/api/core/v1" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 "k8s.io/apimachinery/pkg/api/meta" 30 "k8s.io/apimachinery/pkg/runtime" 31 kerrors "k8s.io/apimachinery/pkg/util/errors" 32 "k8s.io/client-go/tools/record" 33 ctrl "sigs.k8s.io/controller-runtime" 34 "sigs.k8s.io/controller-runtime/pkg/client" 35 "sigs.k8s.io/controller-runtime/pkg/controller" 36 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 37 "sigs.k8s.io/controller-runtime/pkg/handler" 38 "sigs.k8s.io/controller-runtime/pkg/reconcile" 39 40 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 41 "sigs.k8s.io/cluster-api/controllers/external" 42 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 43 "sigs.k8s.io/cluster-api/feature" 44 "sigs.k8s.io/cluster-api/internal/hooks" 45 "sigs.k8s.io/cluster-api/util" 46 "sigs.k8s.io/cluster-api/util/annotations" 47 "sigs.k8s.io/cluster-api/util/collections" 48 "sigs.k8s.io/cluster-api/util/conditions" 49 "sigs.k8s.io/cluster-api/util/patch" 50 "sigs.k8s.io/cluster-api/util/predicates" 51 ) 52 53 const ( 54 // deleteRequeueAfter is how long to wait before checking again to see if the cluster still has children during 55 // deletion. 56 deleteRequeueAfter = 5 * time.Second 57 ) 58 59 // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch 60 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;patch 61 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete 62 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io;controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete 63 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status;clusters/finalizers,verbs=get;list;watch;create;update;patch;delete 64 // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch 65 66 // Reconciler reconciles a Cluster object. 67 type Reconciler struct { 68 Client client.Client 69 UnstructuredCachingClient client.Client 70 APIReader client.Reader 71 72 // WatchFilterValue is the label value used to filter events prior to reconciliation. 73 WatchFilterValue string 74 75 recorder record.EventRecorder 76 externalTracker external.ObjectTracker 77 } 78 79 func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 80 c, err := ctrl.NewControllerManagedBy(mgr). 81 For(&clusterv1.Cluster{}). 82 Watches( 83 &clusterv1.Machine{}, 84 handler.EnqueueRequestsFromMapFunc(r.controlPlaneMachineToCluster), 85 ). 86 WithOptions(options). 87 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 88 Build(r) 89 90 if err != nil { 91 return errors.Wrap(err, "failed setting up with a controller manager") 92 } 93 94 r.recorder = mgr.GetEventRecorderFor("cluster-controller") 95 r.externalTracker = external.ObjectTracker{ 96 Controller: c, 97 Cache: mgr.GetCache(), 98 } 99 return nil 100 } 101 102 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 103 log := ctrl.LoggerFrom(ctx) 104 105 // Fetch the Cluster instance. 106 cluster := &clusterv1.Cluster{} 107 if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil { 108 if apierrors.IsNotFound(err) { 109 // Object not found, return. Created objects are automatically garbage collected. 110 // For additional cleanup logic use finalizers. 111 return ctrl.Result{}, nil 112 } 113 114 // Error reading the object - requeue the request. 115 return ctrl.Result{}, err 116 } 117 118 // Return early if the object or Cluster is paused. 119 if annotations.IsPaused(cluster, cluster) { 120 log.Info("Reconciliation is paused for this object") 121 return ctrl.Result{}, nil 122 } 123 124 // Initialize the patch helper. 125 patchHelper, err := patch.NewHelper(cluster, r.Client) 126 if err != nil { 127 return ctrl.Result{}, err 128 } 129 130 defer func() { 131 // Always reconcile the Status.Phase field. 132 r.reconcilePhase(ctx, cluster) 133 134 // Always attempt to Patch the Cluster object and status after each reconciliation. 135 // Patch ObservedGeneration only if the reconciliation completed successfully 136 patchOpts := []patch.Option{} 137 if reterr == nil { 138 patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) 139 } 140 if err := patchCluster(ctx, patchHelper, cluster, patchOpts...); err != nil { 141 reterr = kerrors.NewAggregate([]error{reterr, err}) 142 } 143 }() 144 145 // Handle deletion reconciliation loop. 146 if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { 147 return r.reconcileDelete(ctx, cluster) 148 } 149 150 // Add finalizer first if not set to avoid the race condition between init and delete. 151 // Note: Finalizers in general can only be added when the deletionTimestamp is not set. 152 if !controllerutil.ContainsFinalizer(cluster, clusterv1.ClusterFinalizer) { 153 controllerutil.AddFinalizer(cluster, clusterv1.ClusterFinalizer) 154 return ctrl.Result{}, nil 155 } 156 157 // Handle normal reconciliation loop. 158 return r.reconcile(ctx, cluster) 159 } 160 161 func patchCluster(ctx context.Context, patchHelper *patch.Helper, cluster *clusterv1.Cluster, options ...patch.Option) error { 162 // Always update the readyCondition by summarizing the state of other conditions. 163 conditions.SetSummary(cluster, 164 conditions.WithConditions( 165 clusterv1.ControlPlaneReadyCondition, 166 clusterv1.InfrastructureReadyCondition, 167 ), 168 ) 169 170 // Patch the object, ignoring conflicts on the conditions owned by this controller. 171 // Also, if requested, we are adding additional options like e.g. Patch ObservedGeneration when issuing the 172 // patch at the end of the reconcile loop. 173 options = append(options, 174 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 175 clusterv1.ReadyCondition, 176 clusterv1.ControlPlaneReadyCondition, 177 clusterv1.InfrastructureReadyCondition, 178 }}, 179 ) 180 return patchHelper.Patch(ctx, cluster, options...) 181 } 182 183 // reconcile handles cluster reconciliation. 184 func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { 185 log := ctrl.LoggerFrom(ctx) 186 187 if cluster.Spec.Topology != nil { 188 if cluster.Spec.ControlPlaneRef == nil || cluster.Spec.InfrastructureRef == nil { 189 // TODO: add a condition to surface this scenario 190 log.Info("Waiting for the topology to be generated") 191 return ctrl.Result{}, nil 192 } 193 } 194 195 phases := []func(context.Context, *clusterv1.Cluster) (ctrl.Result, error){ 196 r.reconcileInfrastructure, 197 r.reconcileControlPlane, 198 r.reconcileKubeconfig, 199 r.reconcileControlPlaneInitialized, 200 } 201 202 res := ctrl.Result{} 203 errs := []error{} 204 for _, phase := range phases { 205 // Call the inner reconciliation methods. 206 phaseResult, err := phase(ctx, cluster) 207 if err != nil { 208 errs = append(errs, err) 209 } 210 if len(errs) > 0 { 211 continue 212 } 213 res = util.LowestNonZeroResult(res, phaseResult) 214 } 215 return res, kerrors.NewAggregate(errs) 216 } 217 218 // reconcileDelete handles cluster deletion. 219 func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { 220 log := ctrl.LoggerFrom(ctx) 221 222 // If the RuntimeSDK and ClusterTopology flags are enabled, for clusters with managed topologies 223 // only proceed with delete if the cluster is marked as `ok-to-delete` 224 if feature.Gates.Enabled(feature.RuntimeSDK) && feature.Gates.Enabled(feature.ClusterTopology) { 225 if cluster.Spec.Topology != nil && !hooks.IsOkToDelete(cluster) { 226 return ctrl.Result{}, nil 227 } 228 } 229 230 descendants, err := r.listDescendants(ctx, cluster) 231 if err != nil { 232 log.Error(err, "Failed to list descendants") 233 return reconcile.Result{}, err 234 } 235 236 children, err := descendants.filterOwnedDescendants(cluster) 237 if err != nil { 238 log.Error(err, "Failed to extract direct descendants") 239 return reconcile.Result{}, err 240 } 241 242 if len(children) > 0 { 243 log.Info("Cluster still has children - deleting them first", "count", len(children)) 244 245 var errs []error 246 247 for _, child := range children { 248 if !child.GetDeletionTimestamp().IsZero() { 249 // Don't handle deleted child 250 continue 251 } 252 gvk := child.GetObjectKind().GroupVersionKind().String() 253 254 log.Info("Deleting child object", "gvk", gvk, "name", child.GetName()) 255 if err := r.Client.Delete(ctx, child); err != nil { 256 err = errors.Wrapf(err, "error deleting cluster %s/%s: failed to delete %s %s", cluster.Namespace, cluster.Name, gvk, child.GetName()) 257 log.Error(err, "Error deleting resource", "gvk", gvk, "name", child.GetName()) 258 errs = append(errs, err) 259 } 260 } 261 262 if len(errs) > 0 { 263 return ctrl.Result{}, kerrors.NewAggregate(errs) 264 } 265 } 266 267 if descendantCount := descendants.length(); descendantCount > 0 { 268 indirect := descendantCount - len(children) 269 log.Info("Cluster still has descendants - need to requeue", "descendants", descendants.descendantNames(), "indirect descendants count", indirect) 270 // Requeue so we can check the next time to see if there are still any descendants left. 271 return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil 272 } 273 274 if cluster.Spec.ControlPlaneRef != nil { 275 obj, err := external.Get(ctx, r.UnstructuredCachingClient, cluster.Spec.ControlPlaneRef, cluster.Namespace) 276 switch { 277 case apierrors.IsNotFound(errors.Cause(err)): 278 // All good - the control plane resource has been deleted 279 conditions.MarkFalse(cluster, clusterv1.ControlPlaneReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") 280 case err != nil: 281 return reconcile.Result{}, errors.Wrapf(err, "failed to get %s %q for Cluster %s/%s", 282 path.Join(cluster.Spec.ControlPlaneRef.APIVersion, cluster.Spec.ControlPlaneRef.Kind), 283 cluster.Spec.ControlPlaneRef.Name, cluster.Namespace, cluster.Name) 284 default: 285 // Report a summary of current status of the control plane object defined for this cluster. 286 conditions.SetMirror(cluster, clusterv1.ControlPlaneReadyCondition, 287 conditions.UnstructuredGetter(obj), 288 conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), 289 ) 290 291 // Issue a deletion request for the control plane object. 292 // Once it's been deleted, the cluster will get processed again. 293 if err := r.Client.Delete(ctx, obj); err != nil { 294 return ctrl.Result{}, errors.Wrapf(err, 295 "failed to delete %v %q for Cluster %q in namespace %q", 296 obj.GroupVersionKind(), obj.GetName(), cluster.Name, cluster.Namespace) 297 } 298 299 // Return here so we don't remove the finalizer yet. 300 log.Info("Cluster still has descendants - need to requeue", "controlPlaneRef", cluster.Spec.ControlPlaneRef.Name) 301 return ctrl.Result{}, nil 302 } 303 } 304 305 if cluster.Spec.InfrastructureRef != nil { 306 obj, err := external.Get(ctx, r.UnstructuredCachingClient, cluster.Spec.InfrastructureRef, cluster.Namespace) 307 switch { 308 case apierrors.IsNotFound(errors.Cause(err)): 309 // All good - the infra resource has been deleted 310 conditions.MarkFalse(cluster, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") 311 case err != nil: 312 return ctrl.Result{}, errors.Wrapf(err, "failed to get %s %q for Cluster %s/%s", 313 path.Join(cluster.Spec.InfrastructureRef.APIVersion, cluster.Spec.InfrastructureRef.Kind), 314 cluster.Spec.InfrastructureRef.Name, cluster.Namespace, cluster.Name) 315 default: 316 // Report a summary of current status of the infrastructure object defined for this cluster. 317 conditions.SetMirror(cluster, clusterv1.InfrastructureReadyCondition, 318 conditions.UnstructuredGetter(obj), 319 conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), 320 ) 321 322 // Issue a deletion request for the infrastructure object. 323 // Once it's been deleted, the cluster will get processed again. 324 if err := r.Client.Delete(ctx, obj); err != nil { 325 return ctrl.Result{}, errors.Wrapf(err, 326 "failed to delete %v %q for Cluster %q in namespace %q", 327 obj.GroupVersionKind(), obj.GetName(), cluster.Name, cluster.Namespace) 328 } 329 330 // Return here so we don't remove the finalizer yet. 331 log.Info("Cluster still has descendants - need to requeue", "infrastructureRef", cluster.Spec.InfrastructureRef.Name) 332 return ctrl.Result{}, nil 333 } 334 } 335 336 controllerutil.RemoveFinalizer(cluster, clusterv1.ClusterFinalizer) 337 r.recorder.Eventf(cluster, corev1.EventTypeNormal, "Deleted", "Cluster %s has been deleted", cluster.Name) 338 return ctrl.Result{}, nil 339 } 340 341 type clusterDescendants struct { 342 machineDeployments clusterv1.MachineDeploymentList 343 machineSets clusterv1.MachineSetList 344 controlPlaneMachines clusterv1.MachineList 345 workerMachines clusterv1.MachineList 346 machinePools expv1.MachinePoolList 347 } 348 349 // length returns the number of descendants. 350 func (c *clusterDescendants) length() int { 351 return len(c.machineDeployments.Items) + 352 len(c.machineSets.Items) + 353 len(c.controlPlaneMachines.Items) + 354 len(c.workerMachines.Items) + 355 len(c.machinePools.Items) 356 } 357 358 func (c *clusterDescendants) descendantNames() string { 359 descendants := make([]string, 0) 360 controlPlaneMachineNames := make([]string, len(c.controlPlaneMachines.Items)) 361 for i, controlPlaneMachine := range c.controlPlaneMachines.Items { 362 controlPlaneMachineNames[i] = controlPlaneMachine.Name 363 } 364 if len(controlPlaneMachineNames) > 0 { 365 descendants = append(descendants, "Control plane machines: "+strings.Join(controlPlaneMachineNames, ",")) 366 } 367 machineDeploymentNames := make([]string, len(c.machineDeployments.Items)) 368 for i, machineDeployment := range c.machineDeployments.Items { 369 machineDeploymentNames[i] = machineDeployment.Name 370 } 371 if len(machineDeploymentNames) > 0 { 372 descendants = append(descendants, "Machine deployments: "+strings.Join(machineDeploymentNames, ",")) 373 } 374 machineSetNames := make([]string, len(c.machineSets.Items)) 375 for i, machineSet := range c.machineSets.Items { 376 machineSetNames[i] = machineSet.Name 377 } 378 if len(machineSetNames) > 0 { 379 descendants = append(descendants, "Machine sets: "+strings.Join(machineSetNames, ",")) 380 } 381 workerMachineNames := make([]string, len(c.workerMachines.Items)) 382 for i, workerMachine := range c.workerMachines.Items { 383 workerMachineNames[i] = workerMachine.Name 384 } 385 if len(workerMachineNames) > 0 { 386 descendants = append(descendants, "Worker machines: "+strings.Join(workerMachineNames, ",")) 387 } 388 if feature.Gates.Enabled(feature.MachinePool) { 389 machinePoolNames := make([]string, len(c.machinePools.Items)) 390 for i, machinePool := range c.machinePools.Items { 391 machinePoolNames[i] = machinePool.Name 392 } 393 if len(machinePoolNames) > 0 { 394 descendants = append(descendants, "Machine pools: "+strings.Join(machinePoolNames, ",")) 395 } 396 } 397 return strings.Join(descendants, ";") 398 } 399 400 // listDescendants returns a list of all MachineDeployments, MachineSets, MachinePools and Machines for the cluster. 401 func (r *Reconciler) listDescendants(ctx context.Context, cluster *clusterv1.Cluster) (clusterDescendants, error) { 402 var descendants clusterDescendants 403 404 listOptions := []client.ListOption{ 405 client.InNamespace(cluster.Namespace), 406 client.MatchingLabels(map[string]string{clusterv1.ClusterNameLabel: cluster.Name}), 407 } 408 409 if err := r.Client.List(ctx, &descendants.machineDeployments, listOptions...); err != nil { 410 return descendants, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", cluster.Namespace, cluster.Name) 411 } 412 413 if err := r.Client.List(ctx, &descendants.machineSets, listOptions...); err != nil { 414 return descendants, errors.Wrapf(err, "failed to list MachineSets for cluster %s/%s", cluster.Namespace, cluster.Name) 415 } 416 417 if feature.Gates.Enabled(feature.MachinePool) { 418 if err := r.Client.List(ctx, &descendants.machinePools, listOptions...); err != nil { 419 return descendants, errors.Wrapf(err, "failed to list MachinePools for the cluster %s/%s", cluster.Namespace, cluster.Name) 420 } 421 } 422 var machines clusterv1.MachineList 423 if err := r.Client.List(ctx, &machines, listOptions...); err != nil { 424 return descendants, errors.Wrapf(err, "failed to list Machines for cluster %s/%s", cluster.Namespace, cluster.Name) 425 } 426 427 // Split machines into control plane and worker machines so we make sure we delete control plane machines last 428 machineCollection := collections.FromMachineList(&machines) 429 controlPlaneMachines := machineCollection.Filter(collections.ControlPlaneMachines(cluster.Name)) 430 workerMachines := machineCollection.Difference(controlPlaneMachines) 431 descendants.workerMachines = collections.ToMachineList(workerMachines) 432 // Only count control plane machines as descendants if there is no control plane provider. 433 if cluster.Spec.ControlPlaneRef == nil { 434 descendants.controlPlaneMachines = collections.ToMachineList(controlPlaneMachines) 435 } 436 437 return descendants, nil 438 } 439 440 // filterOwnedDescendants returns an array of runtime.Objects containing only those descendants that have the cluster 441 // as an owner reference, with control plane machines sorted last. 442 func (c clusterDescendants) filterOwnedDescendants(cluster *clusterv1.Cluster) ([]client.Object, error) { 443 var ownedDescendants []client.Object 444 eachFunc := func(o runtime.Object) error { 445 obj := o.(client.Object) 446 acc, err := meta.Accessor(obj) 447 if err != nil { 448 return nil //nolint:nilerr // We don't want to exit the EachListItem loop, just continue 449 } 450 451 if util.IsOwnedByObject(acc, cluster) { 452 ownedDescendants = append(ownedDescendants, obj) 453 } 454 455 return nil 456 } 457 458 lists := []client.ObjectList{ 459 &c.machineDeployments, 460 &c.machineSets, 461 &c.workerMachines, 462 &c.controlPlaneMachines, 463 } 464 if feature.Gates.Enabled(feature.MachinePool) { 465 lists = append([]client.ObjectList{&c.machinePools}, lists...) 466 } 467 468 for _, list := range lists { 469 if err := meta.EachListItem(list, eachFunc); err != nil { 470 return nil, errors.Wrapf(err, "error finding owned descendants of cluster %s/%s", cluster.Namespace, cluster.Name) 471 } 472 } 473 474 return ownedDescendants, nil 475 } 476 477 func (r *Reconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { 478 log := ctrl.LoggerFrom(ctx) 479 480 // Skip checking if the control plane is initialized when using a Control Plane Provider (this is reconciled in 481 // reconcileControlPlane instead). 482 if cluster.Spec.ControlPlaneRef != nil { 483 log.V(4).Info("Skipping reconcileControlPlaneInitialized because cluster has a controlPlaneRef") 484 return ctrl.Result{}, nil 485 } 486 487 if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { 488 log.V(4).Info("Skipping reconcileControlPlaneInitialized because control plane already initialized") 489 return ctrl.Result{}, nil 490 } 491 492 log.V(4).Info("Checking for control plane initialization") 493 494 machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster, collections.ActiveMachines) 495 if err != nil { 496 log.Error(err, "unable to determine ControlPlaneInitialized") 497 return ctrl.Result{}, err 498 } 499 500 for _, m := range machines { 501 if util.IsControlPlaneMachine(m) && m.Status.NodeRef != nil { 502 conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) 503 return ctrl.Result{}, nil 504 } 505 } 506 507 conditions.MarkFalse(cluster, clusterv1.ControlPlaneInitializedCondition, clusterv1.MissingNodeRefReason, clusterv1.ConditionSeverityInfo, "Waiting for the first control plane machine to have its status.nodeRef set") 508 509 return ctrl.Result{}, nil 510 } 511 512 // controlPlaneMachineToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation 513 // for Cluster to update its status.controlPlaneInitialized field. 514 func (r *Reconciler) controlPlaneMachineToCluster(ctx context.Context, o client.Object) []ctrl.Request { 515 m, ok := o.(*clusterv1.Machine) 516 if !ok { 517 panic(fmt.Sprintf("Expected a Machine but got a %T", o)) 518 } 519 if !util.IsControlPlaneMachine(m) { 520 return nil 521 } 522 if m.Status.NodeRef == nil { 523 return nil 524 } 525 526 cluster, err := util.GetClusterByName(ctx, r.Client, m.Namespace, m.Spec.ClusterName) 527 if err != nil { 528 return nil 529 } 530 531 if conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { 532 return nil 533 } 534 535 return []ctrl.Request{{ 536 NamespacedName: util.ObjectKey(cluster), 537 }} 538 }