sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/topology/cluster/current_state.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cluster 18 19 import ( 20 "context" 21 "fmt" 22 23 "github.com/pkg/errors" 24 corev1 "k8s.io/api/core/v1" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 27 "k8s.io/apimachinery/pkg/runtime/schema" 28 "sigs.k8s.io/controller-runtime/pkg/client" 29 30 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 31 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 32 "sigs.k8s.io/cluster-api/exp/topology/scope" 33 "sigs.k8s.io/cluster-api/internal/contract" 34 tlog "sigs.k8s.io/cluster-api/internal/log" 35 "sigs.k8s.io/cluster-api/util/labels" 36 ) 37 38 // getCurrentState gets information about the current state of a Cluster by inspecting the state of the InfrastructureCluster, 39 // the ControlPlane, and the MachineDeployments associated with the Cluster. 40 func (r *Reconciler) getCurrentState(ctx context.Context, s *scope.Scope) (*scope.ClusterState, error) { 41 // NOTE: current scope has been already initialized with the Cluster. 42 currentState := s.Current 43 44 // Reference to the InfrastructureCluster can be nil and is expected to be on the first reconcile. 45 // In this case the method should still be allowed to continue. 46 if currentState.Cluster.Spec.InfrastructureRef != nil { 47 infra, err := r.getCurrentInfrastructureClusterState(ctx, s.Blueprint.InfrastructureClusterTemplate, currentState.Cluster) 48 if err != nil { 49 return nil, err 50 } 51 currentState.InfrastructureCluster = infra 52 } 53 54 // Reference to the ControlPlane can be nil, and is expected to be on the first reconcile. In this case the method 55 // should still be allowed to continue. 56 currentState.ControlPlane = &scope.ControlPlaneState{} 57 if currentState.Cluster.Spec.ControlPlaneRef != nil { 58 cp, err := r.getCurrentControlPlaneState(ctx, s.Blueprint.ControlPlane, s.Blueprint.HasControlPlaneInfrastructureMachine(), currentState.Cluster) 59 if err != nil { 60 return nil, err 61 } 62 currentState.ControlPlane = cp 63 } 64 65 // A Cluster may have zero or more MachineDeployments and a Cluster is expected to have zero MachineDeployments on 66 // first reconcile. 67 md, err := r.getCurrentMachineDeploymentState(ctx, s.Blueprint.MachineDeployments, currentState.Cluster) 68 if err != nil { 69 return nil, err 70 } 71 currentState.MachineDeployments = md 72 73 // A Cluster may have zero or more MachinePools and a Cluster is expected to have zero MachinePools on 74 // first reconcile. 75 mp, err := r.getCurrentMachinePoolState(ctx, s.Blueprint.MachinePools, currentState.Cluster) 76 if err != nil { 77 return nil, err 78 } 79 currentState.MachinePools = mp 80 81 return currentState, nil 82 } 83 84 // getCurrentInfrastructureClusterState looks for the state of the InfrastructureCluster. If a reference is set but not 85 // found, either from an error or the object not being found, an error is thrown. 86 func (r *Reconciler) getCurrentInfrastructureClusterState(ctx context.Context, blueprintInfrastructureClusterTemplate *unstructured.Unstructured, cluster *clusterv1.Cluster) (*unstructured.Unstructured, error) { 87 ref, err := alignRefAPIVersion(blueprintInfrastructureClusterTemplate, cluster.Spec.InfrastructureRef) 88 if err != nil { 89 return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.InfrastructureRef}) 90 } 91 infra, err := r.getReference(ctx, ref) 92 if err != nil { 93 return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.InfrastructureRef}) 94 } 95 // check that the referenced object has the ClusterTopologyOwnedLabel label. 96 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 97 // owned by the topology. 98 if !labels.IsTopologyOwned(infra) { 99 return nil, fmt.Errorf("infra cluster object %s referenced from cluster %s is not topology owned", tlog.KObj{Obj: infra}, tlog.KObj{Obj: cluster}) 100 } 101 return infra, nil 102 } 103 104 // getCurrentControlPlaneState returns information on the ControlPlane being used by the Cluster. If a reference is not found, 105 // an error is thrown. If the ControlPlane requires MachineInfrastructure according to its ClusterClass an error will be 106 // thrown if the ControlPlane has no MachineTemplates. 107 func (r *Reconciler) getCurrentControlPlaneState(ctx context.Context, blueprintControlPlane *scope.ControlPlaneBlueprint, blueprintHasControlPlaneInfrastructureMachine bool, cluster *clusterv1.Cluster) (*scope.ControlPlaneState, error) { 108 var err error 109 res := &scope.ControlPlaneState{} 110 111 // Get the control plane object. 112 ref, err := alignRefAPIVersion(blueprintControlPlane.Template, cluster.Spec.ControlPlaneRef) 113 if err != nil { 114 return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.ControlPlaneRef}) 115 } 116 res.Object, err = r.getReference(ctx, ref) 117 if err != nil { 118 return nil, errors.Wrapf(err, "failed to read %s", tlog.KRef{Ref: cluster.Spec.ControlPlaneRef}) 119 } 120 // check that the referenced object has the ClusterTopologyOwnedLabel label. 121 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 122 // owned by the topology. 123 if !labels.IsTopologyOwned(res.Object) { 124 return nil, fmt.Errorf("control plane object %s referenced from cluster %s is not topology owned", tlog.KObj{Obj: res.Object}, tlog.KObj{Obj: cluster}) 125 } 126 127 // If the clusterClass does not mandate the controlPlane has infrastructureMachines, return. 128 if !blueprintHasControlPlaneInfrastructureMachine { 129 return res, nil 130 } 131 132 // Otherwise, get the control plane machine infrastructureMachine template. 133 machineInfrastructureRef, err := contract.ControlPlane().MachineTemplate().InfrastructureRef().Get(res.Object) 134 if err != nil { 135 return res, errors.Wrapf(err, "failed to get InfrastructureMachineTemplate reference for %s", tlog.KObj{Obj: res.Object}) 136 } 137 ref, err = alignRefAPIVersion(blueprintControlPlane.InfrastructureMachineTemplate, machineInfrastructureRef) 138 if err != nil { 139 return nil, errors.Wrapf(err, "failed to get InfrastructureMachineTemplate for %s", tlog.KObj{Obj: res.Object}) 140 } 141 res.InfrastructureMachineTemplate, err = r.getReference(ctx, ref) 142 if err != nil { 143 return nil, errors.Wrapf(err, "failed to get InfrastructureMachineTemplate for %s", tlog.KObj{Obj: res.Object}) 144 } 145 // check that the referenced object has the ClusterTopologyOwnedLabel label. 146 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 147 // owned by the topology. 148 if !labels.IsTopologyOwned(res.InfrastructureMachineTemplate) { 149 return nil, fmt.Errorf("control plane InfrastructureMachineTemplate object %s referenced from cluster %s is not topology owned", tlog.KObj{Obj: res.InfrastructureMachineTemplate}, tlog.KObj{Obj: cluster}) 150 } 151 152 mhc := &clusterv1.MachineHealthCheck{} 153 // MachineHealthCheck always has the same name and namespace as the ControlPlane object it belongs to. 154 if err := r.Client.Get(ctx, client.ObjectKey{Namespace: res.Object.GetNamespace(), Name: res.Object.GetName()}, mhc); err != nil { 155 // Not every ControlPlane will have an associated MachineHealthCheck. If no MachineHealthCheck is found return without error. 156 if apierrors.IsNotFound(err) { 157 return res, nil 158 } 159 return nil, errors.Wrapf(err, "failed to get MachineHealthCheck for %s", tlog.KObj{Obj: res.Object}) 160 } 161 res.MachineHealthCheck = mhc 162 return res, nil 163 } 164 165 // getCurrentMachineDeploymentState queries for all MachineDeployments and filters them for their linked Cluster and 166 // whether they are managed by a ClusterClass using labels. A Cluster may have zero or more MachineDeployments. Zero is 167 // expected on first reconcile. If MachineDeployments are found for the Cluster their Infrastructure and Bootstrap references 168 // are inspected. Where these are not found the function will throw an error. 169 func (r *Reconciler) getCurrentMachineDeploymentState(ctx context.Context, blueprintMachineDeployments map[string]*scope.MachineDeploymentBlueprint, cluster *clusterv1.Cluster) (map[string]*scope.MachineDeploymentState, error) { 170 state := make(scope.MachineDeploymentsStateMap) 171 172 // List all the machine deployments in the current cluster and in a managed topology. 173 // Note: This is a cached list call. We ensure in reconcile_state that the cache is up-to-date 174 // after we create/update a MachineDeployment and we double-check if an MD already exists before 175 // we create it. 176 md := &clusterv1.MachineDeploymentList{} 177 err := r.Client.List(ctx, md, 178 client.MatchingLabels{ 179 clusterv1.ClusterNameLabel: cluster.Name, 180 clusterv1.ClusterTopologyOwnedLabel: "", 181 }, 182 client.InNamespace(cluster.Namespace), 183 ) 184 if err != nil { 185 return nil, errors.Wrap(err, "failed to read MachineDeployments for managed topology") 186 } 187 188 // Loop over each machine deployment and create the current 189 // state by retrieving all required references. 190 for i := range md.Items { 191 m := &md.Items[i] 192 193 // Retrieve the name which is assigned in Cluster's topology 194 // from a well-defined label. 195 mdTopologyName, ok := m.ObjectMeta.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] 196 if !ok || mdTopologyName == "" { 197 return nil, fmt.Errorf("failed to find label %s in %s", clusterv1.ClusterTopologyMachineDeploymentNameLabel, tlog.KObj{Obj: m}) 198 } 199 200 // Make sure that the name of the MachineDeployment stays unique. 201 // If we've already seen a MachineDeployment with the same name 202 // this is an error, probably caused from manual modifications or a race condition. 203 if _, ok := state[mdTopologyName]; ok { 204 return nil, fmt.Errorf("duplicate %s found for label %s: %s", tlog.KObj{Obj: m}, clusterv1.ClusterTopologyMachineDeploymentNameLabel, mdTopologyName) 205 } 206 207 // Gets the bootstrapRef. 208 bootstrapRef := m.Spec.Template.Spec.Bootstrap.ConfigRef 209 if bootstrapRef == nil { 210 return nil, fmt.Errorf("%s does not have a reference to a Bootstrap Config", tlog.KObj{Obj: m}) 211 } 212 // Gets the infraRef. 213 infraRef := &m.Spec.Template.Spec.InfrastructureRef 214 if infraRef.Name == "" { 215 return nil, fmt.Errorf("%s does not have a reference to a InfrastructureMachineTemplate", tlog.KObj{Obj: m}) 216 } 217 218 // If the mdTopology exists in the Cluster, lookup the corresponding mdBluePrint and align 219 // the apiVersions in the bootstrapRef and infraRef. 220 // If the mdTopology doesn't exist, do nothing (this can happen if the mdTopology was deleted). 221 // **Note** We can't check if the MachineDeployment has a DeletionTimestamp, because at this point it could not be set yet. 222 if mdTopologyExistsInCluster, mdClassName := getMDClassName(cluster, mdTopologyName); mdTopologyExistsInCluster { 223 mdBluePrint, ok := blueprintMachineDeployments[mdClassName] 224 if !ok { 225 return nil, fmt.Errorf("failed to find MachineDeployment class %s in ClusterClass", mdClassName) 226 } 227 bootstrapRef, err = alignRefAPIVersion(mdBluePrint.BootstrapTemplate, bootstrapRef) 228 if err != nil { 229 return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m})) 230 } 231 infraRef, err = alignRefAPIVersion(mdBluePrint.InfrastructureMachineTemplate, infraRef) 232 if err != nil { 233 return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m})) 234 } 235 } 236 237 // Get the BootstrapTemplate. 238 bootstrapTemplate, err := r.getReference(ctx, bootstrapRef) 239 if err != nil { 240 return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m})) 241 } 242 // check that the referenced object has the ClusterTopologyOwnedLabel label. 243 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 244 // owned by the topology. 245 if !labels.IsTopologyOwned(bootstrapTemplate) { 246 return nil, fmt.Errorf("BootstrapTemplate object %s referenced from MD %s is not topology owned", tlog.KObj{Obj: bootstrapTemplate}, tlog.KObj{Obj: m}) 247 } 248 249 // Get the InfraMachineTemplate. 250 infraMachineTemplate, err := r.getReference(ctx, infraRef) 251 if err != nil { 252 return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m})) 253 } 254 // check that the referenced object has the ClusterTopologyOwnedLabel label. 255 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 256 // owned by the topology. 257 if !labels.IsTopologyOwned(infraMachineTemplate) { 258 return nil, fmt.Errorf("InfrastructureMachineTemplate object %s referenced from MD %s is not topology owned", tlog.KObj{Obj: infraMachineTemplate}, tlog.KObj{Obj: m}) 259 } 260 261 // Gets the MachineHealthCheck. 262 mhc := &clusterv1.MachineHealthCheck{} 263 // MachineHealthCheck always has the same name and namespace as the MachineDeployment it belongs to. 264 if err := r.Client.Get(ctx, client.ObjectKey{Namespace: m.Namespace, Name: m.Name}, mhc); err != nil { 265 // reset the machineHealthCheck to nil if there is an error. 266 mhc = nil 267 268 // Each MachineDeployment isn't required to have a MachineHealthCheck. Ignore the error if it's of the type not found, but return any other error. 269 if !apierrors.IsNotFound(err) { 270 return nil, errors.Wrap(err, fmt.Sprintf("failed to get MachineHealthCheck for %s", tlog.KObj{Obj: m})) 271 } 272 } 273 274 state[mdTopologyName] = &scope.MachineDeploymentState{ 275 Object: m, 276 BootstrapTemplate: bootstrapTemplate, 277 InfrastructureMachineTemplate: infraMachineTemplate, 278 MachineHealthCheck: mhc, 279 } 280 } 281 return state, nil 282 } 283 284 // getCurrentMachinePoolState queries for all MachinePools and filters them for their linked Cluster and 285 // whether they are managed by a ClusterClass using labels. A Cluster may have zero or more MachinePools. Zero is 286 // expected on first reconcile. If MachinePools are found for the Cluster their Infrastructure and Bootstrap references 287 // are inspected. Where these are not found the function will throw an error. 288 func (r *Reconciler) getCurrentMachinePoolState(ctx context.Context, blueprintMachinePools map[string]*scope.MachinePoolBlueprint, cluster *clusterv1.Cluster) (map[string]*scope.MachinePoolState, error) { 289 state := make(scope.MachinePoolsStateMap) 290 291 // List all the machine pools in the current cluster and in a managed topology. 292 // Note: This is a cached list call. We ensure in reconcile_state that the cache is up-to-date 293 // after we create/update a MachinePool and we double-check if an MP already exists before 294 // we create it. 295 mp := &expv1.MachinePoolList{} 296 err := r.Client.List(ctx, mp, 297 client.MatchingLabels{ 298 clusterv1.ClusterNameLabel: cluster.Name, 299 clusterv1.ClusterTopologyOwnedLabel: "", 300 }, 301 client.InNamespace(cluster.Namespace), 302 ) 303 if err != nil { 304 return nil, errors.Wrap(err, "failed to read MachinePools for managed topology") 305 } 306 307 // Loop over each machine pool and create the current 308 // state by retrieving all required references. 309 for i := range mp.Items { 310 m := &mp.Items[i] 311 312 // Retrieve the name which is assigned in Cluster's topology 313 // from a well-defined label. 314 mpTopologyName, ok := m.ObjectMeta.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] 315 if !ok || mpTopologyName == "" { 316 return nil, fmt.Errorf("failed to find label %s in %s", clusterv1.ClusterTopologyMachinePoolNameLabel, tlog.KObj{Obj: m}) 317 } 318 319 // Make sure that the name of the MachinePool stays unique. 320 // If we've already seen a MachinePool with the same name 321 // this is an error, probably caused from manual modifications or a race condition. 322 if _, ok := state[mpTopologyName]; ok { 323 return nil, fmt.Errorf("duplicate %s found for label %s: %s", tlog.KObj{Obj: m}, clusterv1.ClusterTopologyMachinePoolNameLabel, mpTopologyName) 324 } 325 326 // Gets the bootstrapRef. 327 bootstrapRef := m.Spec.Template.Spec.Bootstrap.ConfigRef 328 if bootstrapRef == nil { 329 return nil, fmt.Errorf("%s does not have a reference to a Bootstrap Config", tlog.KObj{Obj: m}) 330 } 331 // Gets the infraRef. 332 infraRef := &m.Spec.Template.Spec.InfrastructureRef 333 if infraRef.Name == "" { 334 return nil, fmt.Errorf("%s does not have a reference to a InfrastructureMachinePool", tlog.KObj{Obj: m}) 335 } 336 337 // If the mpTopology exists in the Cluster, lookup the corresponding mpBluePrint and align 338 // the apiVersions in the bootstrapRef and infraRef. 339 // If the mpTopology doesn't exist, do nothing (this can happen if the mpTopology was deleted). 340 // **Note** We can't check if the MachinePool has a DeletionTimestamp, because at this point it could not be set yet. 341 if mpTopologyExistsInCluster, mpClassName := getMPClassName(cluster, mpTopologyName); mpTopologyExistsInCluster { 342 mpBluePrint, ok := blueprintMachinePools[mpClassName] 343 if !ok { 344 return nil, fmt.Errorf("failed to find MachinePool class %s in ClusterClass", mpClassName) 345 } 346 bootstrapRef, err = alignRefAPIVersion(mpBluePrint.BootstrapTemplate, bootstrapRef) 347 if err != nil { 348 return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m})) 349 } 350 infraRef, err = alignRefAPIVersion(mpBluePrint.InfrastructureMachinePoolTemplate, infraRef) 351 if err != nil { 352 return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m})) 353 } 354 } 355 356 // Get the BootstrapObject 357 bootstrapObject, err := r.getReference(ctx, bootstrapRef) 358 if err != nil { 359 return nil, errors.Wrap(err, fmt.Sprintf("%s Bootstrap reference could not be retrieved", tlog.KObj{Obj: m})) 360 } 361 // check that the referenced object has the ClusterTopologyOwnedLabel label. 362 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 363 // owned by the topology. 364 if !labels.IsTopologyOwned(bootstrapObject) { 365 return nil, fmt.Errorf("bootstrap object %s referenced from MP %s is not topology owned", tlog.KObj{Obj: bootstrapObject}, tlog.KObj{Obj: m}) 366 } 367 368 // Get the InfraMachinePoolObject. 369 infraMachinePoolObject, err := r.getReference(ctx, infraRef) 370 if err != nil { 371 return nil, errors.Wrap(err, fmt.Sprintf("%s Infrastructure reference could not be retrieved", tlog.KObj{Obj: m})) 372 } 373 // check that the referenced object has the ClusterTopologyOwnedLabel label. 374 // Nb. This is to make sure that a managed topology cluster does not have a reference to an object that is not 375 // owned by the topology. 376 if !labels.IsTopologyOwned(infraMachinePoolObject) { 377 return nil, fmt.Errorf("InfrastructureMachinePool object %s referenced from MP %s is not topology owned", tlog.KObj{Obj: infraMachinePoolObject}, tlog.KObj{Obj: m}) 378 } 379 380 state[mpTopologyName] = &scope.MachinePoolState{ 381 Object: m, 382 BootstrapObject: bootstrapObject, 383 InfrastructureMachinePoolObject: infraMachinePoolObject, 384 } 385 } 386 return state, nil 387 } 388 389 // alignRefAPIVersion returns an aligned copy of the currentRef so it matches the apiVersion in ClusterClass. 390 // This is required so the topology controller can diff current and desired state objects of the same 391 // version during reconcile. 392 // If group or kind was changed in the ClusterClass, an exact copy of the currentRef is returned because 393 // it will end up in a diff and a rollout anyway. 394 // Only bootstrap template refs in a ClusterClass can change their group and kind. 395 func alignRefAPIVersion(templateFromClusterClass *unstructured.Unstructured, currentRef *corev1.ObjectReference) (*corev1.ObjectReference, error) { 396 currentGV, err := schema.ParseGroupVersion(currentRef.APIVersion) 397 if err != nil { 398 return nil, errors.Wrapf(err, "failed to parse apiVersion: %q", currentRef.APIVersion) 399 } 400 401 apiVersion := currentRef.APIVersion 402 // Use apiVersion from ClusterClass if group and kind is the same. 403 if templateFromClusterClass.GroupVersionKind().Group == currentGV.Group && 404 templateFromClusterClass.GetKind() == currentRef.Kind { 405 apiVersion = templateFromClusterClass.GetAPIVersion() 406 } 407 408 return &corev1.ObjectReference{ 409 APIVersion: apiVersion, 410 Kind: currentRef.Kind, 411 Namespace: currentRef.Namespace, 412 Name: currentRef.Name, 413 }, nil 414 } 415 416 // getMDClassName retrieves the MDClass name by looking up the MDTopology in the Cluster. 417 func getMDClassName(cluster *clusterv1.Cluster, mdTopologyName string) (bool, string) { 418 if cluster.Spec.Topology.Workers == nil { 419 return false, "" 420 } 421 422 for _, mdTopology := range cluster.Spec.Topology.Workers.MachineDeployments { 423 if mdTopology.Name == mdTopologyName { 424 return true, mdTopology.Class 425 } 426 } 427 return false, "" 428 } 429 430 // getMPClassName retrieves the MPClass name by looking up the MPTopology in the Cluster. 431 func getMPClassName(cluster *clusterv1.Cluster, mpTopologyName string) (bool, string) { 432 if cluster.Spec.Topology.Workers == nil { 433 return false, "" 434 } 435 436 for _, mpTopology := range cluster.Spec.Topology.Workers.MachinePools { 437 if mpTopology.Name == mpTopologyName { 438 return true, mpTopology.Class 439 } 440 } 441 return false, "" 442 }