sigs.k8s.io/cluster-api@v1.6.3/internal/webhooks/cluster.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package webhooks
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"strconv"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/blang/semver/v4"
    28  	"github.com/pkg/errors"
    29  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    30  	"k8s.io/apimachinery/pkg/runtime"
    31  	"k8s.io/apimachinery/pkg/util/validation"
    32  	"k8s.io/apimachinery/pkg/util/validation/field"
    33  	"k8s.io/apimachinery/pkg/util/wait"
    34  	ctrl "sigs.k8s.io/controller-runtime"
    35  	"sigs.k8s.io/controller-runtime/pkg/client"
    36  	"sigs.k8s.io/controller-runtime/pkg/webhook"
    37  	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
    38  
    39  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    40  	"sigs.k8s.io/cluster-api/feature"
    41  	"sigs.k8s.io/cluster-api/internal/topology/check"
    42  	"sigs.k8s.io/cluster-api/internal/topology/variables"
    43  	"sigs.k8s.io/cluster-api/util/conditions"
    44  	"sigs.k8s.io/cluster-api/util/version"
    45  )
    46  
    47  // SetupWebhookWithManager sets up Cluster webhooks.
    48  func (webhook *Cluster) SetupWebhookWithManager(mgr ctrl.Manager) error {
    49  	return ctrl.NewWebhookManagedBy(mgr).
    50  		For(&clusterv1.Cluster{}).
    51  		WithDefaulter(webhook).
    52  		WithValidator(webhook).
    53  		Complete()
    54  }
    55  
    56  // +kubebuilder:webhook:verbs=create;update;delete,path=/validate-cluster-x-k8s-io-v1beta1-cluster,mutating=false,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=clusters,versions=v1beta1,name=validation.cluster.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1
    57  // +kubebuilder:webhook:verbs=create;update,path=/mutate-cluster-x-k8s-io-v1beta1-cluster,mutating=true,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=clusters,versions=v1beta1,name=default.cluster.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1
    58  
    59  // Cluster implements a validating and defaulting webhook for Cluster.
    60  type Cluster struct {
    61  	Client client.Reader
    62  }
    63  
    64  var _ webhook.CustomDefaulter = &Cluster{}
    65  var _ webhook.CustomValidator = &Cluster{}
    66  
    67  var errClusterClassNotReconciled = errors.New("ClusterClass is not up to date")
    68  
    69  // Default satisfies the defaulting webhook interface.
    70  func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
    71  	// We gather all defaulting errors and return them together.
    72  	var allErrs field.ErrorList
    73  
    74  	cluster, ok := obj.(*clusterv1.Cluster)
    75  	if !ok {
    76  		return apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", obj))
    77  	}
    78  
    79  	if cluster.Spec.InfrastructureRef != nil && cluster.Spec.InfrastructureRef.Namespace == "" {
    80  		cluster.Spec.InfrastructureRef.Namespace = cluster.Namespace
    81  	}
    82  
    83  	if cluster.Spec.ControlPlaneRef != nil && cluster.Spec.ControlPlaneRef.Namespace == "" {
    84  		cluster.Spec.ControlPlaneRef.Namespace = cluster.Namespace
    85  	}
    86  
    87  	// Additional defaulting if the Cluster uses a managed topology.
    88  	if cluster.Spec.Topology != nil {
    89  		// Tolerate version strings without a "v" prefix: prepend it if it's not there.
    90  		if !strings.HasPrefix(cluster.Spec.Topology.Version, "v") {
    91  			cluster.Spec.Topology.Version = "v" + cluster.Spec.Topology.Version
    92  		}
    93  		clusterClass, err := webhook.pollClusterClassForCluster(ctx, cluster)
    94  		if err != nil {
    95  			// If the ClusterClass can't be found or is not up to date ignore the error.
    96  			if apierrors.IsNotFound(err) || errors.Is(err, errClusterClassNotReconciled) {
    97  				return nil
    98  			}
    99  			return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved", cluster.Name, cluster.Spec.Topology.Class))
   100  		}
   101  
   102  		// Doing both defaulting and validating here prevents a race condition where the ClusterClass could be
   103  		// different in the defaulting and validating webhook.
   104  		allErrs = append(allErrs, DefaultAndValidateVariables(cluster, clusterClass)...)
   105  
   106  		if len(allErrs) > 0 {
   107  			return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), cluster.Name, allErrs)
   108  		}
   109  	}
   110  	return nil
   111  }
   112  
   113  // ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type.
   114  func (webhook *Cluster) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
   115  	cluster, ok := obj.(*clusterv1.Cluster)
   116  	if !ok {
   117  		return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", obj))
   118  	}
   119  	return webhook.validate(ctx, nil, cluster)
   120  }
   121  
   122  // ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type.
   123  func (webhook *Cluster) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) {
   124  	newCluster, ok := newObj.(*clusterv1.Cluster)
   125  	if !ok {
   126  		return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", newObj))
   127  	}
   128  	oldCluster, ok := oldObj.(*clusterv1.Cluster)
   129  	if !ok {
   130  		return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", oldObj))
   131  	}
   132  	return webhook.validate(ctx, oldCluster, newCluster)
   133  }
   134  
   135  // ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type.
   136  func (webhook *Cluster) ValidateDelete(_ context.Context, _ runtime.Object) (admission.Warnings, error) {
   137  	return nil, nil
   138  }
   139  
   140  func (webhook *Cluster) validate(ctx context.Context, oldCluster, newCluster *clusterv1.Cluster) (admission.Warnings, error) {
   141  	var allErrs field.ErrorList
   142  	var allWarnings admission.Warnings
   143  	// The Cluster name is used as a label value. This check ensures that names which are not valid label values are rejected.
   144  	if errs := validation.IsValidLabelValue(newCluster.Name); len(errs) != 0 {
   145  		for _, err := range errs {
   146  			allErrs = append(
   147  				allErrs,
   148  				field.Invalid(
   149  					field.NewPath("metadata", "name"),
   150  					newCluster.Name,
   151  					fmt.Sprintf("must be a valid label value %s", err),
   152  				),
   153  			)
   154  		}
   155  	}
   156  	specPath := field.NewPath("spec")
   157  	if newCluster.Spec.InfrastructureRef != nil && newCluster.Spec.InfrastructureRef.Namespace != newCluster.Namespace {
   158  		allErrs = append(
   159  			allErrs,
   160  			field.Invalid(
   161  				specPath.Child("infrastructureRef", "namespace"),
   162  				newCluster.Spec.InfrastructureRef.Namespace,
   163  				"must match metadata.namespace",
   164  			),
   165  		)
   166  	}
   167  
   168  	if newCluster.Spec.ControlPlaneRef != nil && newCluster.Spec.ControlPlaneRef.Namespace != newCluster.Namespace {
   169  		allErrs = append(
   170  			allErrs,
   171  			field.Invalid(
   172  				specPath.Child("controlPlaneRef", "namespace"),
   173  				newCluster.Spec.ControlPlaneRef.Namespace,
   174  				"must match metadata.namespace",
   175  			),
   176  		)
   177  	}
   178  	if newCluster.Spec.ClusterNetwork != nil {
   179  		// Ensure that the CIDR blocks defined under ClusterNetwork are valid.
   180  		if newCluster.Spec.ClusterNetwork.Pods != nil {
   181  			allErrs = append(allErrs, validateCIDRBlocks(specPath.Child("clusterNetwork", "pods", "cidrBlocks"),
   182  				newCluster.Spec.ClusterNetwork.Pods.CIDRBlocks)...)
   183  		}
   184  
   185  		if newCluster.Spec.ClusterNetwork.Services != nil {
   186  			allErrs = append(allErrs, validateCIDRBlocks(specPath.Child("clusterNetwork", "services", "cidrBlocks"),
   187  				newCluster.Spec.ClusterNetwork.Services.CIDRBlocks)...)
   188  		}
   189  	}
   190  
   191  	topologyPath := specPath.Child("topology")
   192  
   193  	// Validate the managed topology, if defined.
   194  	if newCluster.Spec.Topology != nil {
   195  		topologyWarnings, topologyErrs := webhook.validateTopology(ctx, oldCluster, newCluster, topologyPath)
   196  		allWarnings = append(allWarnings, topologyWarnings...)
   197  		allErrs = append(allErrs, topologyErrs...)
   198  	}
   199  
   200  	// On update.
   201  	if oldCluster != nil {
   202  		// Error if the update moves the cluster from Managed to Unmanaged i.e. the managed topology is removed on update.
   203  		if oldCluster.Spec.Topology != nil && newCluster.Spec.Topology == nil {
   204  			allErrs = append(allErrs, field.Forbidden(
   205  				topologyPath,
   206  				"cannot be removed from an existing Cluster",
   207  			))
   208  		}
   209  	}
   210  
   211  	if len(allErrs) > 0 {
   212  		return allWarnings, apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), newCluster.Name, allErrs)
   213  	}
   214  	return allWarnings, nil
   215  }
   216  
   217  func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newCluster *clusterv1.Cluster, fldPath *field.Path) (admission.Warnings, field.ErrorList) {
   218  	var allWarnings admission.Warnings
   219  
   220  	// NOTE: ClusterClass and managed topologies are behind ClusterTopology feature gate flag; the web hook
   221  	// must prevent the usage of Cluster.Topology in case the feature flag is disabled.
   222  	if !feature.Gates.Enabled(feature.ClusterTopology) {
   223  		return allWarnings, field.ErrorList{
   224  			field.Forbidden(
   225  				fldPath,
   226  				"can be set only if the ClusterTopology feature flag is enabled",
   227  			),
   228  		}
   229  	}
   230  
   231  	var allErrs field.ErrorList
   232  
   233  	// class should be defined.
   234  	if newCluster.Spec.Topology.Class == "" {
   235  		allErrs = append(
   236  			allErrs,
   237  			field.Required(
   238  				fldPath.Child("class"),
   239  				"class cannot be empty",
   240  			),
   241  		)
   242  		// Return early if there is no defined class to validate.
   243  		return allWarnings, allErrs
   244  	}
   245  
   246  	// version should be valid.
   247  	if !version.KubeSemver.MatchString(newCluster.Spec.Topology.Version) {
   248  		allErrs = append(
   249  			allErrs,
   250  			field.Invalid(
   251  				fldPath.Child("version"),
   252  				newCluster.Spec.Topology.Version,
   253  				"version must be a valid semantic version",
   254  			),
   255  		)
   256  	}
   257  
   258  	// metadata in topology should be valid
   259  	allErrs = append(allErrs, validateTopologyMetadata(newCluster.Spec.Topology, fldPath)...)
   260  
   261  	// upgrade concurrency should be a numeric value.
   262  	if concurrency, ok := newCluster.Annotations[clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation]; ok {
   263  		concurrencyAnnotationField := field.NewPath("metadata", "annotations", clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation)
   264  		concurrencyInt, err := strconv.Atoi(concurrency)
   265  		if err != nil {
   266  			allErrs = append(allErrs, field.Invalid(
   267  				concurrencyAnnotationField,
   268  				concurrency,
   269  				errors.Wrap(err, "could not parse the value of the annotation").Error(),
   270  			))
   271  		} else if concurrencyInt < 1 {
   272  			allErrs = append(allErrs, field.Invalid(
   273  				concurrencyAnnotationField,
   274  				concurrency,
   275  				"value cannot be less than 1",
   276  			))
   277  		}
   278  	}
   279  
   280  	// Get the ClusterClass referenced in the Cluster.
   281  	clusterClass, warnings, clusterClassPollErr := webhook.validateClusterClassExistsAndIsReconciled(ctx, newCluster)
   282  	// If the error is anything other than "NotFound" or "NotReconciled" return all errors.
   283  	if clusterClassPollErr != nil && !(apierrors.IsNotFound(clusterClassPollErr) || errors.Is(clusterClassPollErr, errClusterClassNotReconciled)) {
   284  		allErrs = append(
   285  			allErrs, field.InternalError(
   286  				fldPath.Child("class"),
   287  				clusterClassPollErr))
   288  		return allWarnings, allErrs
   289  	}
   290  
   291  	// Add the warnings if no error was returned.
   292  	allWarnings = append(allWarnings, warnings...)
   293  
   294  	// If there's no error validate the Cluster based on the ClusterClass.
   295  	if clusterClassPollErr == nil {
   296  		allErrs = append(allErrs, ValidateClusterForClusterClass(newCluster, clusterClass)...)
   297  	}
   298  	if oldCluster != nil { // On update
   299  		// The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was
   300  		// not found.
   301  		if apierrors.IsNotFound(clusterClassPollErr) {
   302  			allErrs = append(
   303  				allErrs, field.InternalError(
   304  					fldPath.Child("class"),
   305  					clusterClassPollErr))
   306  			return allWarnings, allErrs
   307  		}
   308  
   309  		// Topology or Class can not be added on update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
   310  		if oldCluster.Spec.Topology == nil || oldCluster.Spec.Topology.Class == "" {
   311  			if _, ok := newCluster.Annotations[clusterv1.ClusterTopologyUnsafeUpdateClassNameAnnotation]; ok {
   312  				return allWarnings, allErrs
   313  			}
   314  
   315  			allErrs = append(
   316  				allErrs,
   317  				field.Forbidden(
   318  					fldPath.Child("class"),
   319  					"class cannot be set on an existing Cluster",
   320  				),
   321  			)
   322  			// return early here if there is no class to compare.
   323  			return allWarnings, allErrs
   324  		}
   325  
   326  		// Version could only be increased.
   327  		inVersion, err := semver.ParseTolerant(newCluster.Spec.Topology.Version)
   328  		if err != nil {
   329  			allErrs = append(
   330  				allErrs,
   331  				field.Invalid(
   332  					fldPath.Child("version"),
   333  					newCluster.Spec.Topology.Version,
   334  					"version must be a valid semantic version",
   335  				),
   336  			)
   337  		}
   338  		oldVersion, err := semver.ParseTolerant(oldCluster.Spec.Topology.Version)
   339  		if err != nil {
   340  			// NOTE: this should never happen. Nevertheless, handling this for extra caution.
   341  			allErrs = append(
   342  				allErrs,
   343  				field.Invalid(
   344  					fldPath.Child("version"),
   345  					oldCluster.Spec.Topology.Version,
   346  					fmt.Sprintf("old version %q cannot be compared with %q", oldVersion, inVersion),
   347  				),
   348  			)
   349  		}
   350  		if inVersion.NE(semver.Version{}) && oldVersion.NE(semver.Version{}) && version.Compare(inVersion, oldVersion, version.WithBuildTags()) == -1 {
   351  			allErrs = append(
   352  				allErrs,
   353  				field.Invalid(
   354  					fldPath.Child("version"),
   355  					newCluster.Spec.Topology.Version,
   356  					fmt.Sprintf("version cannot be decreased from %q to %q", oldVersion, inVersion),
   357  				),
   358  			)
   359  		}
   360  		// A +2 minor version upgrade is not allowed.
   361  		ceilVersion := semver.Version{
   362  			Major: oldVersion.Major,
   363  			Minor: oldVersion.Minor + 2,
   364  			Patch: 0,
   365  		}
   366  		if inVersion.GTE(ceilVersion) {
   367  			allErrs = append(
   368  				allErrs,
   369  				field.Forbidden(
   370  					fldPath.Child("version"),
   371  					fmt.Sprintf("version cannot be increased from %q to %q", oldVersion, inVersion),
   372  				),
   373  			)
   374  		}
   375  
   376  		// If the ClusterClass referenced in the Topology has changed compatibility checks are needed.
   377  		if oldCluster.Spec.Topology.Class != newCluster.Spec.Topology.Class {
   378  			// Check to see if the ClusterClass referenced in the old version of the Cluster exists.
   379  			oldClusterClass, err := webhook.pollClusterClassForCluster(ctx, oldCluster)
   380  			if err != nil {
   381  				allErrs = append(
   382  					allErrs, field.Forbidden(
   383  						fldPath.Child("class"),
   384  						fmt.Sprintf("valid ClusterClass with name %q could not be retrieved, change from class %[1]q to class %q cannot be validated. Error: %s",
   385  							oldCluster.Spec.Topology.Class, newCluster.Spec.Topology.Class, err.Error())))
   386  
   387  				// Return early with errors if the ClusterClass can't be retrieved.
   388  				return allWarnings, allErrs
   389  			}
   390  
   391  			// Check if the new and old ClusterClasses are compatible with one another.
   392  			allErrs = append(allErrs, check.ClusterClassesAreCompatible(oldClusterClass, clusterClass)...)
   393  		}
   394  	}
   395  	return allWarnings, allErrs
   396  }
   397  
   398  func validateMachineHealthChecks(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList {
   399  	var allErrs field.ErrorList
   400  
   401  	if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil {
   402  		fldPath := field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck")
   403  
   404  		// Validate ControlPlane MachineHealthCheck if defined.
   405  		if !cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() {
   406  			// Ensure ControlPlane does not define a MachineHealthCheck if the ClusterClass does not define MachineInfrastructure.
   407  			if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil {
   408  				allErrs = append(allErrs, field.Forbidden(
   409  					fldPath,
   410  					"can be set only if spec.controlPlane.machineInfrastructure is set in ClusterClass",
   411  				))
   412  			}
   413  			allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace,
   414  				&cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass)...)
   415  		}
   416  
   417  		// If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is
   418  		// available either in the Cluster topology or in the ClusterClass.
   419  		// (One of these definitions will be used in the controller to create the MachineHealthCheck)
   420  
   421  		// Check if the machineHealthCheck is explicitly enabled in the ControlPlaneTopology.
   422  		if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable != nil && *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable {
   423  			// Ensure the MHC is defined in at least one of the ControlPlaneTopology of the Cluster or the ControlPlaneClass of the ClusterClass.
   424  			if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() && clusterClass.Spec.ControlPlane.MachineHealthCheck == nil {
   425  				allErrs = append(allErrs, field.Forbidden(
   426  					fldPath.Child("enable"),
   427  					fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable),
   428  				))
   429  			}
   430  		}
   431  	}
   432  
   433  	if cluster.Spec.Topology.Workers != nil {
   434  		for i := range cluster.Spec.Topology.Workers.MachineDeployments {
   435  			md := cluster.Spec.Topology.Workers.MachineDeployments[i]
   436  			if md.MachineHealthCheck != nil {
   437  				fldPath := field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i)
   438  
   439  				// Validate the MachineDeployment MachineHealthCheck if defined.
   440  				if !md.MachineHealthCheck.MachineHealthCheckClass.IsZero() {
   441  					allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace,
   442  						&md.MachineHealthCheck.MachineHealthCheckClass)...)
   443  				}
   444  
   445  				// If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is
   446  				// available either in the Cluster topology or in the ClusterClass.
   447  				// (One of these definitions will be used in the controller to create the MachineHealthCheck)
   448  				mdClass := machineDeploymentClassOfName(clusterClass, md.Class)
   449  				if mdClass != nil { // Note: we skip handling the nil case here as it is already handled in previous validations.
   450  					// Check if the machineHealthCheck is explicitly enabled in the machineDeploymentTopology.
   451  					if md.MachineHealthCheck.Enable != nil && *md.MachineHealthCheck.Enable {
   452  						// Ensure the MHC is defined in at least one of the MachineDeploymentTopology of the Cluster or the MachineDeploymentClass of the ClusterClass.
   453  						if md.MachineHealthCheck.MachineHealthCheckClass.IsZero() && mdClass.MachineHealthCheck == nil {
   454  							allErrs = append(allErrs, field.Forbidden(
   455  								fldPath.Child("enable"),
   456  								fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *md.MachineHealthCheck.Enable),
   457  							))
   458  						}
   459  					}
   460  				}
   461  			}
   462  		}
   463  	}
   464  
   465  	return allErrs
   466  }
   467  
   468  // machineDeploymentClassOfName find a MachineDeploymentClass of the given name in the provided ClusterClass.
   469  // Returns nil if it can not find one.
   470  // TODO: Check if there is already a helper function that can do this.
   471  func machineDeploymentClassOfName(clusterClass *clusterv1.ClusterClass, name string) *clusterv1.MachineDeploymentClass {
   472  	for _, mdClass := range clusterClass.Spec.Workers.MachineDeployments {
   473  		if mdClass.Class == name {
   474  			return &mdClass
   475  		}
   476  	}
   477  	return nil
   478  }
   479  
   480  // validateCIDRBlocks ensures the passed CIDR is valid.
   481  func validateCIDRBlocks(fldPath *field.Path, cidrs []string) field.ErrorList {
   482  	var allErrs field.ErrorList
   483  	for i, cidr := range cidrs {
   484  		if _, _, err := net.ParseCIDR(cidr); err != nil {
   485  			allErrs = append(allErrs, field.Invalid(
   486  				fldPath.Index(i),
   487  				cidr,
   488  				err.Error()))
   489  		}
   490  	}
   491  	return allErrs
   492  }
   493  
   494  // DefaultAndValidateVariables defaults and validates variables in the Cluster and MachineDeployment/MachinePool topologies based
   495  // on the definitions in the ClusterClass.
   496  func DefaultAndValidateVariables(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList {
   497  	var allErrs field.ErrorList
   498  	allErrs = append(allErrs, DefaultVariables(cluster, clusterClass)...)
   499  
   500  	// Variables must be validated in the defaulting webhook. Variable definitions are stored in the ClusterClass status
   501  	// and are patched in the ClusterClass reconcile.
   502  	allErrs = append(allErrs, variables.ValidateClusterVariables(cluster.Spec.Topology.Variables, clusterClass.Status.Variables,
   503  		field.NewPath("spec", "topology", "variables"))...)
   504  	if cluster.Spec.Topology.Workers != nil {
   505  		for i, md := range cluster.Spec.Topology.Workers.MachineDeployments {
   506  			// Continue if there are no variable overrides.
   507  			if md.Variables == nil || len(md.Variables.Overrides) == 0 {
   508  				continue
   509  			}
   510  			allErrs = append(allErrs, variables.ValidateMachineVariables(md.Variables.Overrides, clusterClass.Status.Variables,
   511  				field.NewPath("spec", "topology", "workers", "machineDeployments").Index(i).Child("variables", "overrides"))...)
   512  		}
   513  		for i, mp := range cluster.Spec.Topology.Workers.MachinePools {
   514  			// Continue if there are no variable overrides.
   515  			if mp.Variables == nil || len(mp.Variables.Overrides) == 0 {
   516  				continue
   517  			}
   518  			allErrs = append(allErrs, variables.ValidateMachineVariables(mp.Variables.Overrides, clusterClass.Status.Variables,
   519  				field.NewPath("spec", "topology", "workers", "machinePools").Index(i).Child("variables", "overrides"))...)
   520  		}
   521  	}
   522  	return allErrs
   523  }
   524  
   525  // DefaultVariables defaults variables in the Cluster based on information in the ClusterClass.
   526  func DefaultVariables(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList {
   527  	var allErrs field.ErrorList
   528  	if cluster == nil {
   529  		return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("Cluster can not be nil"))}
   530  	}
   531  	if clusterClass == nil {
   532  		return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("ClusterClass can not be nil"))}
   533  	}
   534  	defaultedVariables, errs := variables.DefaultClusterVariables(cluster.Spec.Topology.Variables, clusterClass.Status.Variables,
   535  		field.NewPath("spec", "topology", "variables"))
   536  	if len(errs) > 0 {
   537  		allErrs = append(allErrs, errs...)
   538  	} else {
   539  		cluster.Spec.Topology.Variables = defaultedVariables
   540  	}
   541  
   542  	if cluster.Spec.Topology.Workers != nil {
   543  		for i, md := range cluster.Spec.Topology.Workers.MachineDeployments {
   544  			// Continue if there are no variable overrides.
   545  			if md.Variables == nil || len(md.Variables.Overrides) == 0 {
   546  				continue
   547  			}
   548  			defaultedVariables, errs := variables.DefaultMachineVariables(md.Variables.Overrides, clusterClass.Status.Variables,
   549  				field.NewPath("spec", "topology", "workers", "machineDeployments").Index(i).Child("variables", "overrides"))
   550  			if len(errs) > 0 {
   551  				allErrs = append(allErrs, errs...)
   552  			} else {
   553  				md.Variables.Overrides = defaultedVariables
   554  			}
   555  		}
   556  		for i, mp := range cluster.Spec.Topology.Workers.MachinePools {
   557  			// Continue if there are no variable overrides.
   558  			if mp.Variables == nil || len(mp.Variables.Overrides) == 0 {
   559  				continue
   560  			}
   561  			defaultedVariables, errs := variables.DefaultMachineVariables(mp.Variables.Overrides, clusterClass.Status.Variables,
   562  				field.NewPath("spec", "topology", "workers", "machinePools").Index(i).Child("variables", "overrides"))
   563  			if len(errs) > 0 {
   564  				allErrs = append(allErrs, errs...)
   565  			} else {
   566  				mp.Variables.Overrides = defaultedVariables
   567  			}
   568  		}
   569  	}
   570  	return allErrs
   571  }
   572  
   573  // ValidateClusterForClusterClass uses information in the ClusterClass to validate the Cluster.
   574  func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList {
   575  	var allErrs field.ErrorList
   576  	if cluster == nil {
   577  		return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("Cluster can not be nil"))}
   578  	}
   579  	if clusterClass == nil {
   580  		return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("ClusterClass can not be nil"))}
   581  	}
   582  	allErrs = append(allErrs, check.MachineDeploymentTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...)
   583  
   584  	allErrs = append(allErrs, check.MachinePoolTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...)
   585  
   586  	// Validate the MachineHealthChecks defined in the cluster topology.
   587  	allErrs = append(allErrs, validateMachineHealthChecks(cluster, clusterClass)...)
   588  	return allErrs
   589  }
   590  
   591  // validateClusterClassExistsAndIsReconciled will try to get the ClusterClass referenced in the Cluster. If it does not exist or is not reconciled it will add a warning.
   592  // In any other case it will return an error.
   593  func (webhook *Cluster) validateClusterClassExistsAndIsReconciled(ctx context.Context, newCluster *clusterv1.Cluster) (*clusterv1.ClusterClass, admission.Warnings, error) {
   594  	var allWarnings admission.Warnings
   595  	clusterClass, clusterClassPollErr := webhook.pollClusterClassForCluster(ctx, newCluster)
   596  	if clusterClassPollErr != nil {
   597  		// Add a warning if the Class does not exist or if it has not been successfully reconciled.
   598  		switch {
   599  		case apierrors.IsNotFound(clusterClassPollErr):
   600  			allWarnings = append(allWarnings,
   601  				fmt.Sprintf(
   602  					"Cluster refers to ClusterClass %s in the topology but it does not exist. "+
   603  						"Cluster topology has not been fully validated. "+
   604  						"The ClusterClass must be created to reconcile the Cluster", newCluster.Spec.Topology.Class),
   605  			)
   606  		case errors.Is(clusterClassPollErr, errClusterClassNotReconciled):
   607  			allWarnings = append(allWarnings,
   608  				fmt.Sprintf(
   609  					"Cluster refers to ClusterClass %s but this object which hasn't yet been reconciled. "+
   610  						"Cluster topology has not been fully validated. ", newCluster.Spec.Topology.Class),
   611  			)
   612  		// If there's any other error return a generic warning with the error message.
   613  		default:
   614  			allWarnings = append(allWarnings,
   615  				fmt.Sprintf(
   616  					"Cluster refers to ClusterClass %s in the topology but it could not be retrieved. "+
   617  						"Cluster topology has not been fully validated: %s", newCluster.Spec.Topology.Class, clusterClassPollErr.Error()),
   618  			)
   619  		}
   620  	}
   621  	return clusterClass, allWarnings, clusterClassPollErr
   622  }
   623  
   624  // pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds.
   625  func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.ClusterClass, error) {
   626  	clusterClass := &clusterv1.ClusterClass{}
   627  	var clusterClassPollErr error
   628  	_ = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 2*time.Second, true, func(ctx context.Context) (bool, error) {
   629  		if clusterClassPollErr = webhook.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Spec.Topology.Class}, clusterClass); clusterClassPollErr != nil {
   630  			return false, nil //nolint:nilerr
   631  		}
   632  
   633  		if clusterClassPollErr = clusterClassIsReconciled(clusterClass); clusterClassPollErr != nil {
   634  			return false, nil //nolint:nilerr
   635  		}
   636  		clusterClassPollErr = nil
   637  		return true, nil
   638  	})
   639  	if clusterClassPollErr != nil {
   640  		return nil, clusterClassPollErr
   641  	}
   642  	return clusterClass, nil
   643  }
   644  
   645  // clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the
   646  // ClusterClass variables have not been successfully reconciled.
   647  func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) error {
   648  	// If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled.
   649  	if clusterClass.Generation != clusterClass.Status.ObservedGeneration {
   650  		return errClusterClassNotReconciled
   651  	}
   652  	// If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled.
   653  	if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) ||
   654  		conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) {
   655  		return errClusterClassNotReconciled
   656  	}
   657  	return nil
   658  }
   659  
   660  func validateTopologyMetadata(topology *clusterv1.Topology, fldPath *field.Path) field.ErrorList {
   661  	var allErrs field.ErrorList
   662  	allErrs = append(allErrs, topology.ControlPlane.Metadata.Validate(fldPath.Child("controlPlane", "metadata"))...)
   663  	if topology.Workers != nil {
   664  		for idx, md := range topology.Workers.MachineDeployments {
   665  			allErrs = append(allErrs, md.Metadata.Validate(
   666  				fldPath.Child("workers", "machineDeployments").Index(idx).Child("metadata"),
   667  			)...)
   668  		}
   669  		for idx, mp := range topology.Workers.MachinePools {
   670  			allErrs = append(allErrs, mp.Metadata.Validate(
   671  				fldPath.Child("workers", "machinePools").Index(idx).Child("metadata"),
   672  			)...)
   673  		}
   674  	}
   675  	return allErrs
   676  }