sigs.k8s.io/cluster-api@v1.7.1/internal/webhooks/cluster.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package webhooks 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "strconv" 24 "strings" 25 "time" 26 27 "github.com/blang/semver/v4" 28 "github.com/pkg/errors" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 "k8s.io/apimachinery/pkg/runtime" 31 kerrors "k8s.io/apimachinery/pkg/util/errors" 32 "k8s.io/apimachinery/pkg/util/validation" 33 "k8s.io/apimachinery/pkg/util/validation/field" 34 "k8s.io/apimachinery/pkg/util/wait" 35 "k8s.io/klog/v2" 36 ctrl "sigs.k8s.io/controller-runtime" 37 "sigs.k8s.io/controller-runtime/pkg/client" 38 "sigs.k8s.io/controller-runtime/pkg/webhook" 39 "sigs.k8s.io/controller-runtime/pkg/webhook/admission" 40 41 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 42 "sigs.k8s.io/cluster-api/controllers/external" 43 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 44 "sigs.k8s.io/cluster-api/feature" 45 "sigs.k8s.io/cluster-api/internal/contract" 46 "sigs.k8s.io/cluster-api/internal/topology/check" 47 "sigs.k8s.io/cluster-api/internal/topology/variables" 48 "sigs.k8s.io/cluster-api/util/conditions" 49 "sigs.k8s.io/cluster-api/util/version" 50 ) 51 52 // SetupWebhookWithManager sets up Cluster webhooks. 53 func (webhook *Cluster) SetupWebhookWithManager(mgr ctrl.Manager) error { 54 return ctrl.NewWebhookManagedBy(mgr). 55 For(&clusterv1.Cluster{}). 56 WithDefaulter(webhook). 57 WithValidator(webhook). 58 Complete() 59 } 60 61 // +kubebuilder:webhook:verbs=create;update;delete,path=/validate-cluster-x-k8s-io-v1beta1-cluster,mutating=false,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=clusters,versions=v1beta1,name=validation.cluster.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 62 // +kubebuilder:webhook:verbs=create;update,path=/mutate-cluster-x-k8s-io-v1beta1-cluster,mutating=true,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=clusters,versions=v1beta1,name=default.cluster.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 63 64 // ClusterCacheTrackerReader is a scoped-down interface from ClusterCacheTracker that only allows to get a reader client. 65 type ClusterCacheTrackerReader interface { 66 GetReader(ctx context.Context, cluster client.ObjectKey) (client.Reader, error) 67 } 68 69 // Cluster implements a validating and defaulting webhook for Cluster. 70 type Cluster struct { 71 Client client.Reader 72 Tracker ClusterCacheTrackerReader 73 } 74 75 var _ webhook.CustomDefaulter = &Cluster{} 76 var _ webhook.CustomValidator = &Cluster{} 77 78 var errClusterClassNotReconciled = errors.New("ClusterClass is not up to date") 79 80 // Default satisfies the defaulting webhook interface. 81 func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error { 82 // We gather all defaulting errors and return them together. 83 var allErrs field.ErrorList 84 85 cluster, ok := obj.(*clusterv1.Cluster) 86 if !ok { 87 return apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", obj)) 88 } 89 90 if cluster.Spec.InfrastructureRef != nil && cluster.Spec.InfrastructureRef.Namespace == "" { 91 cluster.Spec.InfrastructureRef.Namespace = cluster.Namespace 92 } 93 94 if cluster.Spec.ControlPlaneRef != nil && cluster.Spec.ControlPlaneRef.Namespace == "" { 95 cluster.Spec.ControlPlaneRef.Namespace = cluster.Namespace 96 } 97 98 // Additional defaulting if the Cluster uses a managed topology. 99 if cluster.Spec.Topology != nil { 100 // Tolerate version strings without a "v" prefix: prepend it if it's not there. 101 if !strings.HasPrefix(cluster.Spec.Topology.Version, "v") { 102 cluster.Spec.Topology.Version = "v" + cluster.Spec.Topology.Version 103 } 104 clusterClass, err := webhook.pollClusterClassForCluster(ctx, cluster) 105 if err != nil { 106 // If the ClusterClass can't be found or is not up to date ignore the error. 107 if apierrors.IsNotFound(err) || errors.Is(err, errClusterClassNotReconciled) { 108 return nil 109 } 110 return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved", cluster.Name, cluster.Spec.Topology.Class)) 111 } 112 113 // Doing both defaulting and validating here prevents a race condition where the ClusterClass could be 114 // different in the defaulting and validating webhook. 115 allErrs = append(allErrs, DefaultAndValidateVariables(cluster, clusterClass)...) 116 117 if len(allErrs) > 0 { 118 return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), cluster.Name, allErrs) 119 } 120 } 121 return nil 122 } 123 124 // ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type. 125 func (webhook *Cluster) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { 126 cluster, ok := obj.(*clusterv1.Cluster) 127 if !ok { 128 return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", obj)) 129 } 130 return webhook.validate(ctx, nil, cluster) 131 } 132 133 // ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type. 134 func (webhook *Cluster) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { 135 newCluster, ok := newObj.(*clusterv1.Cluster) 136 if !ok { 137 return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", newObj)) 138 } 139 oldCluster, ok := oldObj.(*clusterv1.Cluster) 140 if !ok { 141 return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", oldObj)) 142 } 143 return webhook.validate(ctx, oldCluster, newCluster) 144 } 145 146 // ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type. 147 func (webhook *Cluster) ValidateDelete(_ context.Context, _ runtime.Object) (admission.Warnings, error) { 148 return nil, nil 149 } 150 151 func (webhook *Cluster) validate(ctx context.Context, oldCluster, newCluster *clusterv1.Cluster) (admission.Warnings, error) { 152 var allErrs field.ErrorList 153 var allWarnings admission.Warnings 154 // The Cluster name is used as a label value. This check ensures that names which are not valid label values are rejected. 155 if errs := validation.IsValidLabelValue(newCluster.Name); len(errs) != 0 { 156 for _, err := range errs { 157 allErrs = append( 158 allErrs, 159 field.Invalid( 160 field.NewPath("metadata", "name"), 161 newCluster.Name, 162 fmt.Sprintf("must be a valid label value %s", err), 163 ), 164 ) 165 } 166 } 167 specPath := field.NewPath("spec") 168 if newCluster.Spec.InfrastructureRef != nil && newCluster.Spec.InfrastructureRef.Namespace != newCluster.Namespace { 169 allErrs = append( 170 allErrs, 171 field.Invalid( 172 specPath.Child("infrastructureRef", "namespace"), 173 newCluster.Spec.InfrastructureRef.Namespace, 174 "must match metadata.namespace", 175 ), 176 ) 177 } 178 179 if newCluster.Spec.ControlPlaneRef != nil && newCluster.Spec.ControlPlaneRef.Namespace != newCluster.Namespace { 180 allErrs = append( 181 allErrs, 182 field.Invalid( 183 specPath.Child("controlPlaneRef", "namespace"), 184 newCluster.Spec.ControlPlaneRef.Namespace, 185 "must match metadata.namespace", 186 ), 187 ) 188 } 189 if newCluster.Spec.ClusterNetwork != nil { 190 // Ensure that the CIDR blocks defined under ClusterNetwork are valid. 191 if newCluster.Spec.ClusterNetwork.Pods != nil { 192 allErrs = append(allErrs, validateCIDRBlocks(specPath.Child("clusterNetwork", "pods", "cidrBlocks"), 193 newCluster.Spec.ClusterNetwork.Pods.CIDRBlocks)...) 194 } 195 196 if newCluster.Spec.ClusterNetwork.Services != nil { 197 allErrs = append(allErrs, validateCIDRBlocks(specPath.Child("clusterNetwork", "services", "cidrBlocks"), 198 newCluster.Spec.ClusterNetwork.Services.CIDRBlocks)...) 199 } 200 } 201 202 topologyPath := specPath.Child("topology") 203 204 // Validate the managed topology, if defined. 205 if newCluster.Spec.Topology != nil { 206 topologyWarnings, topologyErrs := webhook.validateTopology(ctx, oldCluster, newCluster, topologyPath) 207 allWarnings = append(allWarnings, topologyWarnings...) 208 allErrs = append(allErrs, topologyErrs...) 209 } 210 211 // On update. 212 if oldCluster != nil { 213 // Error if the update moves the cluster from Managed to Unmanaged i.e. the managed topology is removed on update. 214 if oldCluster.Spec.Topology != nil && newCluster.Spec.Topology == nil { 215 allErrs = append(allErrs, field.Forbidden( 216 topologyPath, 217 "cannot be removed from an existing Cluster", 218 )) 219 } 220 } 221 222 if len(allErrs) > 0 { 223 return allWarnings, apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), newCluster.Name, allErrs) 224 } 225 return allWarnings, nil 226 } 227 228 func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newCluster *clusterv1.Cluster, fldPath *field.Path) (admission.Warnings, field.ErrorList) { 229 var allWarnings admission.Warnings 230 231 // NOTE: ClusterClass and managed topologies are behind ClusterTopology feature gate flag; the web hook 232 // must prevent the usage of Cluster.Topology in case the feature flag is disabled. 233 if !feature.Gates.Enabled(feature.ClusterTopology) { 234 return allWarnings, field.ErrorList{ 235 field.Forbidden( 236 fldPath, 237 "can be set only if the ClusterTopology feature flag is enabled", 238 ), 239 } 240 } 241 242 var allErrs field.ErrorList 243 244 // class should be defined. 245 if newCluster.Spec.Topology.Class == "" { 246 allErrs = append( 247 allErrs, 248 field.Required( 249 fldPath.Child("class"), 250 "class cannot be empty", 251 ), 252 ) 253 // Return early if there is no defined class to validate. 254 return allWarnings, allErrs 255 } 256 257 // version should be valid. 258 if !version.KubeSemver.MatchString(newCluster.Spec.Topology.Version) { 259 allErrs = append( 260 allErrs, 261 field.Invalid( 262 fldPath.Child("version"), 263 newCluster.Spec.Topology.Version, 264 "version must be a valid semantic version", 265 ), 266 ) 267 } 268 269 // metadata in topology should be valid 270 allErrs = append(allErrs, validateTopologyMetadata(newCluster.Spec.Topology, fldPath)...) 271 272 // upgrade concurrency should be a numeric value. 273 if concurrency, ok := newCluster.Annotations[clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation]; ok { 274 concurrencyAnnotationField := field.NewPath("metadata", "annotations", clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation) 275 concurrencyInt, err := strconv.Atoi(concurrency) 276 if err != nil { 277 allErrs = append(allErrs, field.Invalid( 278 concurrencyAnnotationField, 279 concurrency, 280 errors.Wrap(err, "could not parse the value of the annotation").Error(), 281 )) 282 } else if concurrencyInt < 1 { 283 allErrs = append(allErrs, field.Invalid( 284 concurrencyAnnotationField, 285 concurrency, 286 "value cannot be less than 1", 287 )) 288 } 289 } 290 291 // Get the ClusterClass referenced in the Cluster. 292 clusterClass, warnings, clusterClassPollErr := webhook.validateClusterClassExistsAndIsReconciled(ctx, newCluster) 293 // If the error is anything other than "NotFound" or "NotReconciled" return all errors. 294 if clusterClassPollErr != nil && !(apierrors.IsNotFound(clusterClassPollErr) || errors.Is(clusterClassPollErr, errClusterClassNotReconciled)) { 295 allErrs = append( 296 allErrs, field.InternalError( 297 fldPath.Child("class"), 298 clusterClassPollErr)) 299 return allWarnings, allErrs 300 } 301 302 // Add the warnings if no error was returned. 303 allWarnings = append(allWarnings, warnings...) 304 305 // If there's no error validate the Cluster based on the ClusterClass. 306 if clusterClassPollErr == nil { 307 allErrs = append(allErrs, ValidateClusterForClusterClass(newCluster, clusterClass)...) 308 } 309 if oldCluster != nil { // On update 310 // The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was 311 // not found. 312 if apierrors.IsNotFound(clusterClassPollErr) { 313 allErrs = append( 314 allErrs, field.InternalError( 315 fldPath.Child("class"), 316 clusterClassPollErr)) 317 return allWarnings, allErrs 318 } 319 320 // Topology or Class can not be added on update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set. 321 if oldCluster.Spec.Topology == nil || oldCluster.Spec.Topology.Class == "" { 322 if _, ok := newCluster.Annotations[clusterv1.ClusterTopologyUnsafeUpdateClassNameAnnotation]; ok { 323 return allWarnings, allErrs 324 } 325 326 allErrs = append( 327 allErrs, 328 field.Forbidden( 329 fldPath.Child("class"), 330 "class cannot be set on an existing Cluster", 331 ), 332 ) 333 // return early here if there is no class to compare. 334 return allWarnings, allErrs 335 } 336 337 inVersion, err := semver.ParseTolerant(newCluster.Spec.Topology.Version) 338 if err != nil { 339 allErrs = append( 340 allErrs, 341 field.Invalid( 342 fldPath.Child("version"), 343 newCluster.Spec.Topology.Version, 344 "version must be a valid semantic version", 345 ), 346 ) 347 } 348 oldVersion, err := semver.ParseTolerant(oldCluster.Spec.Topology.Version) 349 if err != nil { 350 // NOTE: this should never happen. Nevertheless, handling this for extra caution. 351 allErrs = append( 352 allErrs, 353 field.Invalid( 354 fldPath.Child("version"), 355 oldCluster.Spec.Topology.Version, 356 "old version must be a valid semantic version", 357 ), 358 ) 359 } 360 361 if _, ok := newCluster.GetAnnotations()[clusterv1.ClusterTopologyUnsafeUpdateVersionAnnotation]; ok { 362 log := ctrl.LoggerFrom(ctx) 363 warningMsg := fmt.Sprintf("Skipping version validation for Cluster because annotation %q is set.", clusterv1.ClusterTopologyUnsafeUpdateVersionAnnotation) 364 log.Info(warningMsg) 365 allWarnings = append(allWarnings, warningMsg) 366 } else { 367 if err := webhook.validateTopologyVersion(ctx, fldPath.Child("version"), newCluster.Spec.Topology.Version, inVersion, oldVersion, oldCluster); err != nil { 368 allErrs = append(allErrs, err) 369 } 370 } 371 372 // If the ClusterClass referenced in the Topology has changed compatibility checks are needed. 373 if oldCluster.Spec.Topology.Class != newCluster.Spec.Topology.Class { 374 // Check to see if the ClusterClass referenced in the old version of the Cluster exists. 375 oldClusterClass, err := webhook.pollClusterClassForCluster(ctx, oldCluster) 376 if err != nil { 377 allErrs = append( 378 allErrs, field.Forbidden( 379 fldPath.Child("class"), 380 fmt.Sprintf("valid ClusterClass with name %q could not be retrieved, change from class %[1]q to class %q cannot be validated. Error: %s", 381 oldCluster.Spec.Topology.Class, newCluster.Spec.Topology.Class, err.Error()))) 382 383 // Return early with errors if the ClusterClass can't be retrieved. 384 return allWarnings, allErrs 385 } 386 387 // Check if the new and old ClusterClasses are compatible with one another. 388 allErrs = append(allErrs, check.ClusterClassesAreCompatible(oldClusterClass, clusterClass)...) 389 } 390 } 391 return allWarnings, allErrs 392 } 393 394 func (webhook *Cluster) validateTopologyVersion(ctx context.Context, fldPath *field.Path, fldValue string, inVersion, oldVersion semver.Version, oldCluster *clusterv1.Cluster) *field.Error { 395 // Version could only be increased. 396 if inVersion.NE(semver.Version{}) && oldVersion.NE(semver.Version{}) && version.Compare(inVersion, oldVersion, version.WithBuildTags()) == -1 { 397 return field.Invalid( 398 fldPath, 399 fldValue, 400 fmt.Sprintf("version cannot be decreased from %q to %q", oldVersion, inVersion), 401 ) 402 } 403 404 // A +2 minor version upgrade is not allowed. 405 ceilVersion := semver.Version{ 406 Major: oldVersion.Major, 407 Minor: oldVersion.Minor + 2, 408 Patch: 0, 409 } 410 if inVersion.GTE(ceilVersion) { 411 return field.Invalid( 412 fldPath, 413 fldValue, 414 fmt.Sprintf("version cannot be increased from %q to %q", oldVersion, inVersion), 415 ) 416 } 417 418 // Only check the following cases if the minor version increases by 1 (we already return above for >= 2). 419 ceilVersion = semver.Version{ 420 Major: oldVersion.Major, 421 Minor: oldVersion.Minor + 1, 422 Patch: 0, 423 } 424 425 // Return early if its not a minor version upgrade. 426 if !inVersion.GTE(ceilVersion) { 427 return nil 428 } 429 430 allErrs := []error{} 431 // minor version cannot be increased if control plane is upgrading or not yet on the current version 432 if err := validateTopologyControlPlaneVersion(ctx, webhook.Client, oldCluster, oldVersion); err != nil { 433 allErrs = append(allErrs, fmt.Errorf("blocking version update due to ControlPlane version check: %v", err)) 434 } 435 436 // minor version cannot be increased if MachineDeployments are upgrading or not yet on the current version 437 if err := validateTopologyMachineDeploymentVersions(ctx, webhook.Client, oldCluster, oldVersion); err != nil { 438 allErrs = append(allErrs, fmt.Errorf("blocking version update due to MachineDeployment version check: %v", err)) 439 } 440 441 // minor version cannot be increased if MachinePools are upgrading or not yet on the current version 442 if err := validateTopologyMachinePoolVersions(ctx, webhook.Client, webhook.Tracker, oldCluster, oldVersion); err != nil { 443 allErrs = append(allErrs, fmt.Errorf("blocking version update due to MachinePool version check: %v", err)) 444 } 445 446 if len(allErrs) > 0 { 447 return field.Invalid( 448 fldPath, 449 fldValue, 450 fmt.Sprintf("minor version update cannot happen at this time: %v", kerrors.NewAggregate(allErrs)), 451 ) 452 } 453 454 return nil 455 } 456 457 func validateTopologyControlPlaneVersion(ctx context.Context, ctrlClient client.Reader, oldCluster *clusterv1.Cluster, oldVersion semver.Version) error { 458 cp, err := external.Get(ctx, ctrlClient, oldCluster.Spec.ControlPlaneRef, oldCluster.Namespace) 459 if err != nil { 460 return errors.Wrap(err, "failed to get ControlPlane object") 461 } 462 463 cpVersionString, err := contract.ControlPlane().Version().Get(cp) 464 if err != nil { 465 return errors.Wrap(err, "failed to get ControlPlane version") 466 } 467 468 cpVersion, err := semver.ParseTolerant(*cpVersionString) 469 if err != nil { 470 // NOTE: this should never happen. Nevertheless, handling this for extra caution. 471 return errors.New("failed to parse version of ControlPlane") 472 } 473 if cpVersion.NE(oldVersion) { 474 return fmt.Errorf("ControlPlane version %q does not match the current version %q", cpVersion, oldVersion) 475 } 476 477 provisioning, err := contract.ControlPlane().IsProvisioning(cp) 478 if err != nil { 479 return errors.Wrap(err, "failed to check if ControlPlane is provisioning") 480 } 481 482 if provisioning { 483 return errors.New("ControlPlane is currently provisioning") 484 } 485 486 upgrading, err := contract.ControlPlane().IsUpgrading(cp) 487 if err != nil { 488 return errors.Wrap(err, "failed to check if ControlPlane is upgrading") 489 } 490 491 if upgrading { 492 return errors.New("ControlPlane is still completing a previous upgrade") 493 } 494 495 return nil 496 } 497 498 func validateTopologyMachineDeploymentVersions(ctx context.Context, ctrlClient client.Reader, oldCluster *clusterv1.Cluster, oldVersion semver.Version) error { 499 // List all the machine deployments in the current cluster and in a managed topology. 500 // FROM: current_state.go getCurrentMachineDeploymentState 501 mds := &clusterv1.MachineDeploymentList{} 502 err := ctrlClient.List(ctx, mds, 503 client.MatchingLabels{ 504 clusterv1.ClusterNameLabel: oldCluster.Name, 505 clusterv1.ClusterTopologyOwnedLabel: "", 506 }, 507 client.InNamespace(oldCluster.Namespace), 508 ) 509 if err != nil { 510 return errors.Wrap(err, "failed to read MachineDeployments for managed topology") 511 } 512 513 if len(mds.Items) == 0 { 514 return nil 515 } 516 517 mdUpgradingNames := []string{} 518 519 for i := range mds.Items { 520 md := &mds.Items[i] 521 522 mdVersion, err := semver.ParseTolerant(*md.Spec.Template.Spec.Version) 523 if err != nil { 524 // NOTE: this should never happen. Nevertheless, handling this for extra caution. 525 return errors.Wrapf(err, "failed to parse MachineDeployment's %q version %q", klog.KObj(md), *md.Spec.Template.Spec.Version) 526 } 527 528 if mdVersion.NE(oldVersion) { 529 mdUpgradingNames = append(mdUpgradingNames, md.Name) 530 continue 531 } 532 533 upgrading, err := check.IsMachineDeploymentUpgrading(ctx, ctrlClient, md) 534 if err != nil { 535 return errors.Wrap(err, "failed to check if MachineDeployment is upgrading") 536 } 537 if upgrading { 538 mdUpgradingNames = append(mdUpgradingNames, md.Name) 539 } 540 } 541 542 if len(mdUpgradingNames) > 0 { 543 return fmt.Errorf("there are MachineDeployments still completing a previous upgrade: [%s]", strings.Join(mdUpgradingNames, ", ")) 544 } 545 546 return nil 547 } 548 549 func validateTopologyMachinePoolVersions(ctx context.Context, ctrlClient client.Reader, tracker ClusterCacheTrackerReader, oldCluster *clusterv1.Cluster, oldVersion semver.Version) error { 550 // List all the machine pools in the current cluster and in a managed topology. 551 // FROM: current_state.go getCurrentMachinePoolState 552 mps := &expv1.MachinePoolList{} 553 err := ctrlClient.List(ctx, mps, 554 client.MatchingLabels{ 555 clusterv1.ClusterNameLabel: oldCluster.Name, 556 clusterv1.ClusterTopologyOwnedLabel: "", 557 }, 558 client.InNamespace(oldCluster.Namespace), 559 ) 560 if err != nil { 561 return errors.Wrap(err, "failed to read MachinePools for managed topology") 562 } 563 564 // Return early 565 if len(mps.Items) == 0 { 566 return nil 567 } 568 569 wlClient, err := tracker.GetReader(ctx, client.ObjectKeyFromObject(oldCluster)) 570 if err != nil { 571 return errors.Wrap(err, "unable to get client for workload cluster") 572 } 573 574 mpUpgradingNames := []string{} 575 576 for i := range mps.Items { 577 mp := &mps.Items[i] 578 579 mpVersion, err := semver.ParseTolerant(*mp.Spec.Template.Spec.Version) 580 if err != nil { 581 // NOTE: this should never happen. Nevertheless, handling this for extra caution. 582 return errors.Wrapf(err, "failed to parse MachinePool's %q version %q", klog.KObj(mp), *mp.Spec.Template.Spec.Version) 583 } 584 585 if mpVersion.NE(oldVersion) { 586 mpUpgradingNames = append(mpUpgradingNames, mp.Name) 587 continue 588 } 589 590 upgrading, err := check.IsMachinePoolUpgrading(ctx, wlClient, mp) 591 if err != nil { 592 return errors.Wrap(err, "failed to check if MachinePool is upgrading") 593 } 594 if upgrading { 595 mpUpgradingNames = append(mpUpgradingNames, mp.Name) 596 } 597 } 598 599 if len(mpUpgradingNames) > 0 { 600 return fmt.Errorf("there are MachinePools still completing a previous upgrade: [%s]", strings.Join(mpUpgradingNames, ", ")) 601 } 602 603 return nil 604 } 605 606 func validateMachineHealthChecks(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList { 607 var allErrs field.ErrorList 608 609 if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil { 610 fldPath := field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck") 611 612 // Validate ControlPlane MachineHealthCheck if defined. 613 if !cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() { 614 // Ensure ControlPlane does not define a MachineHealthCheck if the ClusterClass does not define MachineInfrastructure. 615 if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil { 616 allErrs = append(allErrs, field.Forbidden( 617 fldPath, 618 "can be set only if spec.controlPlane.machineInfrastructure is set in ClusterClass", 619 )) 620 } 621 allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace, 622 &cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass)...) 623 } 624 625 // If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is 626 // available either in the Cluster topology or in the ClusterClass. 627 // (One of these definitions will be used in the controller to create the MachineHealthCheck) 628 629 // Check if the machineHealthCheck is explicitly enabled in the ControlPlaneTopology. 630 if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable != nil && *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable { 631 // Ensure the MHC is defined in at least one of the ControlPlaneTopology of the Cluster or the ControlPlaneClass of the ClusterClass. 632 if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() && clusterClass.Spec.ControlPlane.MachineHealthCheck == nil { 633 allErrs = append(allErrs, field.Forbidden( 634 fldPath.Child("enable"), 635 fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable), 636 )) 637 } 638 } 639 } 640 641 if cluster.Spec.Topology.Workers != nil { 642 for i := range cluster.Spec.Topology.Workers.MachineDeployments { 643 md := cluster.Spec.Topology.Workers.MachineDeployments[i] 644 if md.MachineHealthCheck != nil { 645 fldPath := field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i) 646 647 // Validate the MachineDeployment MachineHealthCheck if defined. 648 if !md.MachineHealthCheck.MachineHealthCheckClass.IsZero() { 649 allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace, 650 &md.MachineHealthCheck.MachineHealthCheckClass)...) 651 } 652 653 // If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is 654 // available either in the Cluster topology or in the ClusterClass. 655 // (One of these definitions will be used in the controller to create the MachineHealthCheck) 656 mdClass := machineDeploymentClassOfName(clusterClass, md.Class) 657 if mdClass != nil { // Note: we skip handling the nil case here as it is already handled in previous validations. 658 // Check if the machineHealthCheck is explicitly enabled in the machineDeploymentTopology. 659 if md.MachineHealthCheck.Enable != nil && *md.MachineHealthCheck.Enable { 660 // Ensure the MHC is defined in at least one of the MachineDeploymentTopology of the Cluster or the MachineDeploymentClass of the ClusterClass. 661 if md.MachineHealthCheck.MachineHealthCheckClass.IsZero() && mdClass.MachineHealthCheck == nil { 662 allErrs = append(allErrs, field.Forbidden( 663 fldPath.Child("enable"), 664 fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *md.MachineHealthCheck.Enable), 665 )) 666 } 667 } 668 } 669 } 670 } 671 } 672 673 return allErrs 674 } 675 676 // machineDeploymentClassOfName find a MachineDeploymentClass of the given name in the provided ClusterClass. 677 // Returns nil if it can not find one. 678 // TODO: Check if there is already a helper function that can do this. 679 func machineDeploymentClassOfName(clusterClass *clusterv1.ClusterClass, name string) *clusterv1.MachineDeploymentClass { 680 for _, mdClass := range clusterClass.Spec.Workers.MachineDeployments { 681 if mdClass.Class == name { 682 return &mdClass 683 } 684 } 685 return nil 686 } 687 688 // validateCIDRBlocks ensures the passed CIDR is valid. 689 func validateCIDRBlocks(fldPath *field.Path, cidrs []string) field.ErrorList { 690 var allErrs field.ErrorList 691 for i, cidr := range cidrs { 692 if _, _, err := net.ParseCIDR(cidr); err != nil { 693 allErrs = append(allErrs, field.Invalid( 694 fldPath.Index(i), 695 cidr, 696 err.Error())) 697 } 698 } 699 return allErrs 700 } 701 702 // DefaultAndValidateVariables defaults and validates variables in the Cluster and MachineDeployment/MachinePool topologies based 703 // on the definitions in the ClusterClass. 704 func DefaultAndValidateVariables(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList { 705 var allErrs field.ErrorList 706 allErrs = append(allErrs, DefaultVariables(cluster, clusterClass)...) 707 708 // Variables must be validated in the defaulting webhook. Variable definitions are stored in the ClusterClass status 709 // and are patched in the ClusterClass reconcile. 710 allErrs = append(allErrs, variables.ValidateClusterVariables(cluster.Spec.Topology.Variables, clusterClass.Status.Variables, 711 field.NewPath("spec", "topology", "variables"))...) 712 if cluster.Spec.Topology.Workers != nil { 713 for i, md := range cluster.Spec.Topology.Workers.MachineDeployments { 714 // Continue if there are no variable overrides. 715 if md.Variables == nil || len(md.Variables.Overrides) == 0 { 716 continue 717 } 718 allErrs = append(allErrs, variables.ValidateMachineVariables(md.Variables.Overrides, clusterClass.Status.Variables, 719 field.NewPath("spec", "topology", "workers", "machineDeployments").Index(i).Child("variables", "overrides"))...) 720 } 721 for i, mp := range cluster.Spec.Topology.Workers.MachinePools { 722 // Continue if there are no variable overrides. 723 if mp.Variables == nil || len(mp.Variables.Overrides) == 0 { 724 continue 725 } 726 allErrs = append(allErrs, variables.ValidateMachineVariables(mp.Variables.Overrides, clusterClass.Status.Variables, 727 field.NewPath("spec", "topology", "workers", "machinePools").Index(i).Child("variables", "overrides"))...) 728 } 729 } 730 return allErrs 731 } 732 733 // DefaultVariables defaults variables in the Cluster based on information in the ClusterClass. 734 func DefaultVariables(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList { 735 var allErrs field.ErrorList 736 if cluster == nil { 737 return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("Cluster can not be nil"))} 738 } 739 if clusterClass == nil { 740 return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("ClusterClass can not be nil"))} 741 } 742 defaultedVariables, errs := variables.DefaultClusterVariables(cluster.Spec.Topology.Variables, clusterClass.Status.Variables, 743 field.NewPath("spec", "topology", "variables")) 744 if len(errs) > 0 { 745 allErrs = append(allErrs, errs...) 746 } else { 747 cluster.Spec.Topology.Variables = defaultedVariables 748 } 749 750 if cluster.Spec.Topology.Workers != nil { 751 for i, md := range cluster.Spec.Topology.Workers.MachineDeployments { 752 // Continue if there are no variable overrides. 753 if md.Variables == nil || len(md.Variables.Overrides) == 0 { 754 continue 755 } 756 defaultedVariables, errs := variables.DefaultMachineVariables(md.Variables.Overrides, clusterClass.Status.Variables, 757 field.NewPath("spec", "topology", "workers", "machineDeployments").Index(i).Child("variables", "overrides")) 758 if len(errs) > 0 { 759 allErrs = append(allErrs, errs...) 760 } else { 761 md.Variables.Overrides = defaultedVariables 762 } 763 } 764 for i, mp := range cluster.Spec.Topology.Workers.MachinePools { 765 // Continue if there are no variable overrides. 766 if mp.Variables == nil || len(mp.Variables.Overrides) == 0 { 767 continue 768 } 769 defaultedVariables, errs := variables.DefaultMachineVariables(mp.Variables.Overrides, clusterClass.Status.Variables, 770 field.NewPath("spec", "topology", "workers", "machinePools").Index(i).Child("variables", "overrides")) 771 if len(errs) > 0 { 772 allErrs = append(allErrs, errs...) 773 } else { 774 mp.Variables.Overrides = defaultedVariables 775 } 776 } 777 } 778 return allErrs 779 } 780 781 // ValidateClusterForClusterClass uses information in the ClusterClass to validate the Cluster. 782 func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList { 783 var allErrs field.ErrorList 784 if cluster == nil { 785 return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("Cluster can not be nil"))} 786 } 787 if clusterClass == nil { 788 return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("ClusterClass can not be nil"))} 789 } 790 allErrs = append(allErrs, check.MachineDeploymentTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...) 791 792 allErrs = append(allErrs, check.MachinePoolTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...) 793 794 // Validate the MachineHealthChecks defined in the cluster topology. 795 allErrs = append(allErrs, validateMachineHealthChecks(cluster, clusterClass)...) 796 return allErrs 797 } 798 799 // validateClusterClassExistsAndIsReconciled will try to get the ClusterClass referenced in the Cluster. If it does not exist or is not reconciled it will add a warning. 800 // In any other case it will return an error. 801 func (webhook *Cluster) validateClusterClassExistsAndIsReconciled(ctx context.Context, newCluster *clusterv1.Cluster) (*clusterv1.ClusterClass, admission.Warnings, error) { 802 var allWarnings admission.Warnings 803 clusterClass, clusterClassPollErr := webhook.pollClusterClassForCluster(ctx, newCluster) 804 if clusterClassPollErr != nil { 805 // Add a warning if the Class does not exist or if it has not been successfully reconciled. 806 switch { 807 case apierrors.IsNotFound(clusterClassPollErr): 808 allWarnings = append(allWarnings, 809 fmt.Sprintf( 810 "Cluster refers to ClusterClass %s in the topology but it does not exist. "+ 811 "Cluster topology has not been fully validated. "+ 812 "The ClusterClass must be created to reconcile the Cluster", newCluster.Spec.Topology.Class), 813 ) 814 case errors.Is(clusterClassPollErr, errClusterClassNotReconciled): 815 allWarnings = append(allWarnings, 816 fmt.Sprintf( 817 "Cluster refers to ClusterClass %s but this object which hasn't yet been reconciled. "+ 818 "Cluster topology has not been fully validated. ", newCluster.Spec.Topology.Class), 819 ) 820 // If there's any other error return a generic warning with the error message. 821 default: 822 allWarnings = append(allWarnings, 823 fmt.Sprintf( 824 "Cluster refers to ClusterClass %s in the topology but it could not be retrieved. "+ 825 "Cluster topology has not been fully validated: %s", newCluster.Spec.Topology.Class, clusterClassPollErr.Error()), 826 ) 827 } 828 } 829 return clusterClass, allWarnings, clusterClassPollErr 830 } 831 832 // pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds. 833 func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.ClusterClass, error) { 834 clusterClass := &clusterv1.ClusterClass{} 835 var clusterClassPollErr error 836 _ = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 2*time.Second, true, func(ctx context.Context) (bool, error) { 837 if clusterClassPollErr = webhook.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Spec.Topology.Class}, clusterClass); clusterClassPollErr != nil { 838 return false, nil //nolint:nilerr 839 } 840 841 if clusterClassPollErr = clusterClassIsReconciled(clusterClass); clusterClassPollErr != nil { 842 return false, nil //nolint:nilerr 843 } 844 clusterClassPollErr = nil 845 return true, nil 846 }) 847 if clusterClassPollErr != nil { 848 return nil, clusterClassPollErr 849 } 850 return clusterClass, nil 851 } 852 853 // clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the 854 // ClusterClass variables have not been successfully reconciled. 855 func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) error { 856 // If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled. 857 if clusterClass.Generation != clusterClass.Status.ObservedGeneration { 858 return errClusterClassNotReconciled 859 } 860 // If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled. 861 if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) || 862 conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) { 863 return errClusterClassNotReconciled 864 } 865 return nil 866 } 867 868 func validateTopologyMetadata(topology *clusterv1.Topology, fldPath *field.Path) field.ErrorList { 869 var allErrs field.ErrorList 870 allErrs = append(allErrs, topology.ControlPlane.Metadata.Validate(fldPath.Child("controlPlane", "metadata"))...) 871 if topology.Workers != nil { 872 for idx, md := range topology.Workers.MachineDeployments { 873 allErrs = append(allErrs, md.Metadata.Validate( 874 fldPath.Child("workers", "machineDeployments").Index(idx).Child("metadata"), 875 )...) 876 } 877 for idx, mp := range topology.Workers.MachinePools { 878 allErrs = append(allErrs, mp.Metadata.Validate( 879 fldPath.Child("workers", "machinePools").Index(idx).Child("metadata"), 880 )...) 881 } 882 } 883 return allErrs 884 }