sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/topology/cluster/cluster_controller.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cluster 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "github.com/pkg/errors" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 "k8s.io/apimachinery/pkg/types" 27 kerrors "k8s.io/apimachinery/pkg/util/errors" 28 "k8s.io/client-go/tools/record" 29 ctrl "sigs.k8s.io/controller-runtime" 30 "sigs.k8s.io/controller-runtime/pkg/builder" 31 "sigs.k8s.io/controller-runtime/pkg/client" 32 "sigs.k8s.io/controller-runtime/pkg/controller" 33 "sigs.k8s.io/controller-runtime/pkg/handler" 34 "sigs.k8s.io/controller-runtime/pkg/reconcile" 35 36 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 37 "sigs.k8s.io/cluster-api/api/v1beta1/index" 38 "sigs.k8s.io/cluster-api/controllers/external" 39 "sigs.k8s.io/cluster-api/controllers/remote" 40 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 41 runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" 42 runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" 43 "sigs.k8s.io/cluster-api/exp/topology/desiredstate" 44 "sigs.k8s.io/cluster-api/exp/topology/scope" 45 "sigs.k8s.io/cluster-api/feature" 46 "sigs.k8s.io/cluster-api/internal/controllers/topology/cluster/structuredmerge" 47 "sigs.k8s.io/cluster-api/internal/hooks" 48 tlog "sigs.k8s.io/cluster-api/internal/log" 49 runtimeclient "sigs.k8s.io/cluster-api/internal/runtime/client" 50 "sigs.k8s.io/cluster-api/internal/util/ssa" 51 "sigs.k8s.io/cluster-api/internal/webhooks" 52 "sigs.k8s.io/cluster-api/util" 53 "sigs.k8s.io/cluster-api/util/annotations" 54 "sigs.k8s.io/cluster-api/util/patch" 55 "sigs.k8s.io/cluster-api/util/predicates" 56 ) 57 58 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io;controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete 59 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;create;update;patch;delete 60 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses,verbs=get;list;watch;create;update;patch;delete 61 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete 62 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools,verbs=get;list;watch;create;update;patch;delete 63 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinehealthchecks,verbs=get;list;watch;create;update;patch;delete 64 // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch 65 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;delete 66 67 // Reconciler reconciles a managed topology for a Cluster object. 68 type Reconciler struct { 69 Client client.Client 70 Tracker *remote.ClusterCacheTracker 71 // APIReader is used to list MachineSets directly via the API server to avoid 72 // race conditions caused by an outdated cache. 73 APIReader client.Reader 74 75 RuntimeClient runtimeclient.Client 76 77 // WatchFilterValue is the label value used to filter events prior to reconciliation. 78 WatchFilterValue string 79 80 // UnstructuredCachingClient provides a client that forces caching of unstructured objects, 81 // thus allowing to optimize reads for templates or provider specific objects in a managed topology. 82 UnstructuredCachingClient client.Client 83 84 externalTracker external.ObjectTracker 85 recorder record.EventRecorder 86 87 // desiredStateGenerator is used to generate the desired state. 88 desiredStateGenerator desiredstate.Generator 89 90 patchHelperFactory structuredmerge.PatchHelperFactoryFunc 91 } 92 93 func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 94 c, err := ctrl.NewControllerManagedBy(mgr). 95 For(&clusterv1.Cluster{}, builder.WithPredicates( 96 // Only reconcile Cluster with topology. 97 predicates.ClusterHasTopology(ctrl.LoggerFrom(ctx)), 98 )). 99 Named("topology/cluster"). 100 Watches( 101 &clusterv1.ClusterClass{}, 102 handler.EnqueueRequestsFromMapFunc(r.clusterClassToCluster), 103 ). 104 Watches( 105 &clusterv1.MachineDeployment{}, 106 handler.EnqueueRequestsFromMapFunc(r.machineDeploymentToCluster), 107 // Only trigger Cluster reconciliation if the MachineDeployment is topology owned. 108 builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))), 109 ). 110 Watches( 111 &expv1.MachinePool{}, 112 handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster), 113 // Only trigger Cluster reconciliation if the MachinePool is topology owned. 114 builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))), 115 ). 116 WithOptions(options). 117 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 118 Build(r) 119 120 if err != nil { 121 return errors.Wrap(err, "failed setting up with a controller manager") 122 } 123 124 r.externalTracker = external.ObjectTracker{ 125 Controller: c, 126 Cache: mgr.GetCache(), 127 } 128 r.desiredStateGenerator = desiredstate.NewGenerator(r.Client, r.Tracker, r.RuntimeClient) 129 r.recorder = mgr.GetEventRecorderFor("topology/cluster-controller") 130 if r.patchHelperFactory == nil { 131 r.patchHelperFactory = serverSideApplyPatchHelperFactory(r.Client, ssa.NewCache()) 132 } 133 return nil 134 } 135 136 // SetupForDryRun prepares the Reconciler for a dry run execution. 137 func (r *Reconciler) SetupForDryRun(recorder record.EventRecorder) { 138 r.desiredStateGenerator = desiredstate.NewGenerator(r.Client, r.Tracker, r.RuntimeClient) 139 r.recorder = recorder 140 r.patchHelperFactory = dryRunPatchHelperFactory(r.Client) 141 } 142 143 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 144 log := ctrl.LoggerFrom(ctx) 145 146 // Fetch the Cluster instance. 147 cluster := &clusterv1.Cluster{} 148 if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil { 149 if apierrors.IsNotFound(err) { 150 return ctrl.Result{}, nil 151 } 152 // Error reading the object - requeue the request. 153 return ctrl.Result{}, err 154 } 155 cluster.APIVersion = clusterv1.GroupVersion.String() 156 cluster.Kind = "Cluster" 157 158 // Return early, if the Cluster does not use a managed topology. 159 // NOTE: We're already filtering events, but this is a safeguard for cases like e.g. when 160 // there are MachineDeployments which have the topology owned label, but the corresponding 161 // cluster is not topology owned. 162 if cluster.Spec.Topology == nil { 163 return ctrl.Result{}, nil 164 } 165 166 // Return early if the Cluster is paused. 167 // TODO: What should we do if the cluster class is paused? 168 if annotations.IsPaused(cluster, cluster) { 169 log.Info("Reconciliation is paused for this object") 170 return ctrl.Result{}, nil 171 } 172 173 patchHelper, err := patch.NewHelper(cluster, r.Client) 174 if err != nil { 175 return ctrl.Result{}, err 176 } 177 178 // Create a scope initialized with only the cluster; during reconcile 179 // additional information will be added about the Cluster blueprint, current state and desired state. 180 s := scope.New(cluster) 181 182 defer func() { 183 if err := r.reconcileConditions(s, cluster, reterr); err != nil { 184 reterr = kerrors.NewAggregate([]error{reterr, errors.Wrap(err, "failed to reconcile cluster topology conditions")}) 185 return 186 } 187 options := []patch.Option{ 188 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 189 clusterv1.TopologyReconciledCondition, 190 }}, 191 patch.WithForceOverwriteConditions{}, 192 } 193 if err := patchHelper.Patch(ctx, cluster, options...); err != nil { 194 reterr = kerrors.NewAggregate([]error{reterr, err}) 195 return 196 } 197 }() 198 199 // In case the object is deleted, the managed topology stops to reconcile; 200 // (the other controllers will take care of deletion). 201 if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { 202 return r.reconcileDelete(ctx, cluster) 203 } 204 205 // Handle normal reconciliation loop. 206 result, err := r.reconcile(ctx, s) 207 if err != nil { 208 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 209 // the current cluster because of concurrent access. 210 if errors.Is(err, remote.ErrClusterLocked) { 211 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 212 return ctrl.Result{RequeueAfter: time.Minute}, nil 213 } 214 } 215 return result, err 216 } 217 218 // reconcile handles cluster reconciliation. 219 func (r *Reconciler) reconcile(ctx context.Context, s *scope.Scope) (ctrl.Result, error) { 220 var err error 221 222 // Get ClusterClass. 223 clusterClass := &clusterv1.ClusterClass{} 224 key := client.ObjectKey{Name: s.Current.Cluster.Spec.Topology.Class, Namespace: s.Current.Cluster.Namespace} 225 if err := r.Client.Get(ctx, key, clusterClass); err != nil { 226 return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve ClusterClass %s", s.Current.Cluster.Spec.Topology.Class) 227 } 228 229 s.Blueprint.ClusterClass = clusterClass 230 // If the ClusterClass `metadata.Generation` doesn't match the `status.ObservedGeneration` return as the ClusterClass 231 // is not up to date. 232 // Note: This doesn't require requeue as a change to ClusterClass observedGeneration will cause an additional reconcile 233 // in the Cluster. 234 if clusterClass.GetGeneration() != clusterClass.Status.ObservedGeneration { 235 return ctrl.Result{}, nil 236 } 237 238 // Default and Validate the Cluster variables based on information from the ClusterClass. 239 // This step is needed as if the ClusterClass does not exist at Cluster creation some fields may not be defaulted or 240 // validated in the webhook. 241 if errs := webhooks.DefaultAndValidateVariables(s.Current.Cluster, clusterClass); len(errs) > 0 { 242 return ctrl.Result{}, apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), s.Current.Cluster.Name, errs) 243 } 244 245 // Gets the blueprint with the ClusterClass and the referenced templates 246 // and store it in the request scope. 247 s.Blueprint, err = r.getBlueprint(ctx, s.Current.Cluster, s.Blueprint.ClusterClass) 248 if err != nil { 249 return ctrl.Result{}, errors.Wrap(err, "error reading the ClusterClass") 250 } 251 252 // Gets the current state of the Cluster and store it in the request scope. 253 s.Current, err = r.getCurrentState(ctx, s) 254 if err != nil { 255 return ctrl.Result{}, errors.Wrap(err, "error reading current state of the Cluster topology") 256 } 257 258 // The cluster topology is yet to be created. Call the BeforeClusterCreate hook before proceeding. 259 if feature.Gates.Enabled(feature.RuntimeSDK) { 260 res, err := r.callBeforeClusterCreateHook(ctx, s) 261 if err != nil { 262 return reconcile.Result{}, err 263 } 264 if !res.IsZero() { 265 return res, nil 266 } 267 } 268 269 // Setup watches for InfrastructureCluster and ControlPlane CRs when they exist. 270 if err := r.setupDynamicWatches(ctx, s); err != nil { 271 return ctrl.Result{}, errors.Wrap(err, "error creating dynamic watch") 272 } 273 274 // Computes the desired state of the Cluster and store it in the request scope. 275 s.Desired, err = r.desiredStateGenerator.Generate(ctx, s) 276 if err != nil { 277 return ctrl.Result{}, errors.Wrap(err, "error computing the desired state of the Cluster topology") 278 } 279 280 // Reconciles current and desired state of the Cluster 281 if err := r.reconcileState(ctx, s); err != nil { 282 return ctrl.Result{}, errors.Wrap(err, "error reconciling the Cluster topology") 283 } 284 285 // requeueAfter will not be 0 if any of the runtime hooks returns a blocking response. 286 requeueAfter := s.HookResponseTracker.AggregateRetryAfter() 287 if requeueAfter != 0 { 288 return ctrl.Result{RequeueAfter: requeueAfter}, nil 289 } 290 291 return ctrl.Result{}, nil 292 } 293 294 // setupDynamicWatches create watches for InfrastructureCluster and ControlPlane CRs when they exist. 295 func (r *Reconciler) setupDynamicWatches(ctx context.Context, s *scope.Scope) error { 296 if s.Current.InfrastructureCluster != nil { 297 if err := r.externalTracker.Watch(ctrl.LoggerFrom(ctx), s.Current.InfrastructureCluster, 298 handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Cluster{}), 299 // Only trigger Cluster reconciliation if the InfrastructureCluster is topology owned. 300 predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))); err != nil { 301 return errors.Wrap(err, "error watching Infrastructure CR") 302 } 303 } 304 if s.Current.ControlPlane.Object != nil { 305 if err := r.externalTracker.Watch(ctrl.LoggerFrom(ctx), s.Current.ControlPlane.Object, 306 handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Cluster{}), 307 // Only trigger Cluster reconciliation if the ControlPlane is topology owned. 308 predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))); err != nil { 309 return errors.Wrap(err, "error watching ControlPlane CR") 310 } 311 } 312 return nil 313 } 314 315 func (r *Reconciler) callBeforeClusterCreateHook(ctx context.Context, s *scope.Scope) (reconcile.Result, error) { 316 // If the cluster objects (InfraCluster, ControlPlane, etc) are not yet created we are in the creation phase. 317 // Call the BeforeClusterCreate hook before proceeding. 318 log := tlog.LoggerFrom(ctx) 319 if s.Current.Cluster.Spec.InfrastructureRef == nil && s.Current.Cluster.Spec.ControlPlaneRef == nil { 320 hookRequest := &runtimehooksv1.BeforeClusterCreateRequest{ 321 Cluster: *s.Current.Cluster, 322 } 323 hookResponse := &runtimehooksv1.BeforeClusterCreateResponse{} 324 if err := r.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.BeforeClusterCreate, s.Current.Cluster, hookRequest, hookResponse); err != nil { 325 return ctrl.Result{}, err 326 } 327 s.HookResponseTracker.Add(runtimehooksv1.BeforeClusterCreate, hookResponse) 328 if hookResponse.RetryAfterSeconds != 0 { 329 log.Infof("Creation of Cluster topology is blocked by %s hook", runtimecatalog.HookName(runtimehooksv1.BeforeClusterCreate)) 330 return ctrl.Result{RequeueAfter: time.Duration(hookResponse.RetryAfterSeconds) * time.Second}, nil 331 } 332 } 333 return ctrl.Result{}, nil 334 } 335 336 // clusterClassToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation 337 // for Cluster to update when its own ClusterClass gets updated. 338 func (r *Reconciler) clusterClassToCluster(ctx context.Context, o client.Object) []ctrl.Request { 339 clusterClass, ok := o.(*clusterv1.ClusterClass) 340 if !ok { 341 panic(fmt.Sprintf("Expected a ClusterClass but got a %T", o)) 342 } 343 344 clusterList := &clusterv1.ClusterList{} 345 if err := r.Client.List( 346 ctx, 347 clusterList, 348 client.MatchingFields{index.ClusterClassNameField: clusterClass.Name}, 349 client.InNamespace(clusterClass.Namespace), 350 ); err != nil { 351 return nil 352 } 353 354 // There can be more than one cluster using the same cluster class. 355 // create a request for each of the clusters. 356 requests := []ctrl.Request{} 357 for i := range clusterList.Items { 358 requests = append(requests, ctrl.Request{NamespacedName: util.ObjectKey(&clusterList.Items[i])}) 359 } 360 return requests 361 } 362 363 // machineDeploymentToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation 364 // for Cluster to update when one of its own MachineDeployments gets updated. 365 func (r *Reconciler) machineDeploymentToCluster(_ context.Context, o client.Object) []ctrl.Request { 366 md, ok := o.(*clusterv1.MachineDeployment) 367 if !ok { 368 panic(fmt.Sprintf("Expected a MachineDeployment but got a %T", o)) 369 } 370 if md.Spec.ClusterName == "" { 371 return nil 372 } 373 374 return []ctrl.Request{{ 375 NamespacedName: types.NamespacedName{ 376 Namespace: md.Namespace, 377 Name: md.Spec.ClusterName, 378 }, 379 }} 380 } 381 382 // machinePoolToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation 383 // for Cluster to update when one of its own MachinePools gets updated. 384 func (r *Reconciler) machinePoolToCluster(_ context.Context, o client.Object) []ctrl.Request { 385 mp, ok := o.(*expv1.MachinePool) 386 if !ok { 387 panic(fmt.Sprintf("Expected a MachinePool but got a %T", o)) 388 } 389 if mp.Spec.ClusterName == "" { 390 return nil 391 } 392 393 return []ctrl.Request{{ 394 NamespacedName: types.NamespacedName{ 395 Namespace: mp.Namespace, 396 Name: mp.Spec.ClusterName, 397 }, 398 }} 399 } 400 401 func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { 402 // Call the BeforeClusterDelete hook if the 'ok-to-delete' annotation is not set 403 // and add the annotation to the cluster after receiving a successful non-blocking response. 404 log := tlog.LoggerFrom(ctx) 405 if feature.Gates.Enabled(feature.RuntimeSDK) { 406 if !hooks.IsOkToDelete(cluster) { 407 hookRequest := &runtimehooksv1.BeforeClusterDeleteRequest{ 408 Cluster: *cluster, 409 } 410 hookResponse := &runtimehooksv1.BeforeClusterDeleteResponse{} 411 if err := r.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.BeforeClusterDelete, cluster, hookRequest, hookResponse); err != nil { 412 return ctrl.Result{}, err 413 } 414 if hookResponse.RetryAfterSeconds != 0 { 415 log.Infof("Cluster deletion is blocked by %q hook", runtimecatalog.HookName(runtimehooksv1.BeforeClusterDelete)) 416 return ctrl.Result{RequeueAfter: time.Duration(hookResponse.RetryAfterSeconds) * time.Second}, nil 417 } 418 // The BeforeClusterDelete hook returned a non-blocking response. Now the cluster is ready to be deleted. 419 // Lets mark the cluster as `ok-to-delete` 420 if err := hooks.MarkAsOkToDelete(ctx, r.Client, cluster); err != nil { 421 return ctrl.Result{}, err 422 } 423 } 424 } 425 return ctrl.Result{}, nil 426 } 427 428 // serverSideApplyPatchHelperFactory makes use of managed fields provided by server side apply and is used by the controller. 429 func serverSideApplyPatchHelperFactory(c client.Client, ssaCache ssa.Cache) structuredmerge.PatchHelperFactoryFunc { 430 return func(ctx context.Context, original, modified client.Object, opts ...structuredmerge.HelperOption) (structuredmerge.PatchHelper, error) { 431 return structuredmerge.NewServerSidePatchHelper(ctx, original, modified, c, ssaCache, opts...) 432 } 433 } 434 435 // dryRunPatchHelperFactory makes use of a two-ways patch and is used in situations where we cannot rely on managed fields. 436 func dryRunPatchHelperFactory(c client.Client) structuredmerge.PatchHelperFactoryFunc { 437 return func(_ context.Context, original, modified client.Object, opts ...structuredmerge.HelperOption) (structuredmerge.PatchHelper, error) { 438 return structuredmerge.NewTwoWaysPatchHelper(original, modified, c, opts...) 439 } 440 }