sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/controllers/controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/blang/semver/v4"
    26  	"github.com/pkg/errors"
    27  	corev1 "k8s.io/api/core/v1"
    28  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    31  	"k8s.io/client-go/tools/record"
    32  	"k8s.io/klog/v2"
    33  	"k8s.io/utils/ptr"
    34  	ctrl "sigs.k8s.io/controller-runtime"
    35  	"sigs.k8s.io/controller-runtime/pkg/builder"
    36  	"sigs.k8s.io/controller-runtime/pkg/client"
    37  	"sigs.k8s.io/controller-runtime/pkg/controller"
    38  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    39  	"sigs.k8s.io/controller-runtime/pkg/handler"
    40  
    41  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    42  	bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    43  	"sigs.k8s.io/cluster-api/controllers/remote"
    44  	controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
    45  	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal"
    46  	expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    47  	"sigs.k8s.io/cluster-api/feature"
    48  	"sigs.k8s.io/cluster-api/internal/contract"
    49  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    50  	"sigs.k8s.io/cluster-api/util"
    51  	"sigs.k8s.io/cluster-api/util/annotations"
    52  	"sigs.k8s.io/cluster-api/util/collections"
    53  	"sigs.k8s.io/cluster-api/util/conditions"
    54  	"sigs.k8s.io/cluster-api/util/patch"
    55  	"sigs.k8s.io/cluster-api/util/predicates"
    56  	"sigs.k8s.io/cluster-api/util/secret"
    57  	"sigs.k8s.io/cluster-api/util/version"
    58  )
    59  
    60  const (
    61  	kcpManagerName          = "capi-kubeadmcontrolplane"
    62  	kubeadmControlPlaneKind = "KubeadmControlPlane"
    63  )
    64  
    65  // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
    66  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch
    67  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io;controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete
    68  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch
    69  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;create;update;patch;delete
    70  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools,verbs=list
    71  // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch
    72  
    73  // KubeadmControlPlaneReconciler reconciles a KubeadmControlPlane object.
    74  type KubeadmControlPlaneReconciler struct {
    75  	Client              client.Client
    76  	SecretCachingClient client.Client
    77  	controller          controller.Controller
    78  	recorder            record.EventRecorder
    79  	Tracker             *remote.ClusterCacheTracker
    80  
    81  	EtcdDialTimeout time.Duration
    82  	EtcdCallTimeout time.Duration
    83  
    84  	// WatchFilterValue is the label value used to filter events prior to reconciliation.
    85  	WatchFilterValue string
    86  
    87  	managementCluster         internal.ManagementCluster
    88  	managementClusterUncached internal.ManagementCluster
    89  	ssaCache                  ssa.Cache
    90  }
    91  
    92  func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    93  	c, err := ctrl.NewControllerManagedBy(mgr).
    94  		For(&controlplanev1.KubeadmControlPlane{}).
    95  		Owns(&clusterv1.Machine{}).
    96  		WithOptions(options).
    97  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
    98  		Watches(
    99  			&clusterv1.Cluster{},
   100  			handler.EnqueueRequestsFromMapFunc(r.ClusterToKubeadmControlPlane),
   101  			builder.WithPredicates(
   102  				predicates.All(ctrl.LoggerFrom(ctx),
   103  					predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
   104  					predicates.ClusterUnpausedAndInfrastructureReady(ctrl.LoggerFrom(ctx)),
   105  				),
   106  			),
   107  		).Build(r)
   108  	if err != nil {
   109  		return errors.Wrap(err, "failed setting up with a controller manager")
   110  	}
   111  
   112  	r.controller = c
   113  	r.recorder = mgr.GetEventRecorderFor("kubeadmcontrolplane-controller")
   114  	r.ssaCache = ssa.NewCache()
   115  
   116  	if r.managementCluster == nil {
   117  		if r.Tracker == nil {
   118  			return errors.New("cluster cache tracker is nil, cannot create the internal management cluster resource")
   119  		}
   120  		r.managementCluster = &internal.Management{
   121  			Client:              r.Client,
   122  			SecretCachingClient: r.SecretCachingClient,
   123  			Tracker:             r.Tracker,
   124  			EtcdDialTimeout:     r.EtcdDialTimeout,
   125  			EtcdCallTimeout:     r.EtcdCallTimeout,
   126  		}
   127  	}
   128  
   129  	if r.managementClusterUncached == nil {
   130  		r.managementClusterUncached = &internal.Management{Client: mgr.GetAPIReader()}
   131  	}
   132  
   133  	return nil
   134  }
   135  
   136  func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, reterr error) {
   137  	log := ctrl.LoggerFrom(ctx)
   138  
   139  	// Fetch the KubeadmControlPlane instance.
   140  	kcp := &controlplanev1.KubeadmControlPlane{}
   141  	if err := r.Client.Get(ctx, req.NamespacedName, kcp); err != nil {
   142  		if apierrors.IsNotFound(err) {
   143  			return ctrl.Result{}, nil
   144  		}
   145  		return ctrl.Result{Requeue: true}, nil
   146  	}
   147  
   148  	// Fetch the Cluster.
   149  	cluster, err := util.GetOwnerCluster(ctx, r.Client, kcp.ObjectMeta)
   150  	if err != nil {
   151  		log.Error(err, "Failed to retrieve owner Cluster from the API Server")
   152  		return ctrl.Result{}, err
   153  	}
   154  	if cluster == nil {
   155  		log.Info("Cluster Controller has not yet set OwnerRef")
   156  		return ctrl.Result{}, nil
   157  	}
   158  	log = log.WithValues("Cluster", klog.KObj(cluster))
   159  	ctx = ctrl.LoggerInto(ctx, log)
   160  
   161  	if annotations.IsPaused(cluster, kcp) {
   162  		log.Info("Reconciliation is paused for this object")
   163  		return ctrl.Result{}, nil
   164  	}
   165  
   166  	// Initialize the patch helper.
   167  	patchHelper, err := patch.NewHelper(kcp, r.Client)
   168  	if err != nil {
   169  		log.Error(err, "Failed to configure the patch helper")
   170  		return ctrl.Result{Requeue: true}, nil
   171  	}
   172  
   173  	// Add finalizer first if not set to avoid the race condition between init and delete.
   174  	// Note: Finalizers in general can only be added when the deletionTimestamp is not set.
   175  	if kcp.ObjectMeta.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer) {
   176  		controllerutil.AddFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer)
   177  
   178  		// patch and return right away instead of reusing the main defer,
   179  		// because the main defer may take too much time to get cluster status
   180  		// Patch ObservedGeneration only if the reconciliation completed successfully
   181  		patchOpts := []patch.Option{patch.WithStatusObservedGeneration{}}
   182  		if err := patchHelper.Patch(ctx, kcp, patchOpts...); err != nil {
   183  			return ctrl.Result{}, errors.Wrapf(err, "failed to add finalizer")
   184  		}
   185  
   186  		return ctrl.Result{}, nil
   187  	}
   188  
   189  	// Initialize the control plane scope; this includes also checking for orphan machines and
   190  	// adopt them if necessary.
   191  	controlPlane, adoptableMachineFound, err := r.initControlPlaneScope(ctx, cluster, kcp)
   192  	if err != nil {
   193  		return ctrl.Result{}, err
   194  	}
   195  	if adoptableMachineFound {
   196  		// if there are no errors but at least one CP machine has been adopted, then requeue and
   197  		// wait for the update event for the ownership to be set.
   198  		return ctrl.Result{}, nil
   199  	}
   200  
   201  	defer func() {
   202  		// Always attempt to update status.
   203  		if err := r.updateStatus(ctx, controlPlane); err != nil {
   204  			var connFailure *internal.RemoteClusterConnectionError
   205  			if errors.As(err, &connFailure) {
   206  				log.Info("Could not connect to workload cluster to fetch status", "err", err.Error())
   207  			} else {
   208  				log.Error(err, "Failed to update KubeadmControlPlane Status")
   209  				reterr = kerrors.NewAggregate([]error{reterr, err})
   210  			}
   211  		}
   212  
   213  		// Always attempt to Patch the KubeadmControlPlane object and status after each reconciliation.
   214  		if err := patchKubeadmControlPlane(ctx, patchHelper, kcp); err != nil {
   215  			log.Error(err, "Failed to patch KubeadmControlPlane")
   216  			reterr = kerrors.NewAggregate([]error{reterr, err})
   217  		}
   218  
   219  		// Only requeue if there is no error, Requeue or RequeueAfter and the object does not have a deletion timestamp.
   220  		if reterr == nil && res.IsZero() && kcp.ObjectMeta.DeletionTimestamp.IsZero() {
   221  			// Make KCP requeue in case node status is not ready, so we can check for node status without waiting for a full
   222  			// resync (by default 10 minutes).
   223  			// The alternative solution would be to watch the control plane nodes in the Cluster - similar to how the
   224  			// MachineSet and MachineHealthCheck controllers watch the nodes under their control.
   225  			if !kcp.Status.Ready {
   226  				res = ctrl.Result{RequeueAfter: 20 * time.Second}
   227  			}
   228  
   229  			// Make KCP requeue if ControlPlaneComponentsHealthyCondition is false so we can check for control plane component
   230  			// status without waiting for a full resync (by default 10 minutes).
   231  			// Otherwise this condition can lead to a delay in provisioning MachineDeployments when MachineSet preflight checks are enabled.
   232  			// The alternative solution to this requeue would be watching the relevant pods inside each workload cluster which would be very expensive.
   233  			if conditions.IsFalse(kcp, controlplanev1.ControlPlaneComponentsHealthyCondition) {
   234  				res = ctrl.Result{RequeueAfter: 20 * time.Second}
   235  			}
   236  		}
   237  	}()
   238  
   239  	if !kcp.ObjectMeta.DeletionTimestamp.IsZero() {
   240  		// Handle deletion reconciliation loop.
   241  		res, err = r.reconcileDelete(ctx, controlPlane)
   242  		// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   243  		// the current cluster because of concurrent access.
   244  		if errors.Is(err, remote.ErrClusterLocked) {
   245  			log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   246  			return ctrl.Result{RequeueAfter: time.Minute}, nil
   247  		}
   248  		return res, err
   249  	}
   250  
   251  	// Handle normal reconciliation loop.
   252  	res, err = r.reconcile(ctx, controlPlane)
   253  	// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   254  	// the current cluster because of concurrent access.
   255  	if errors.Is(err, remote.ErrClusterLocked) {
   256  		log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   257  		return ctrl.Result{RequeueAfter: time.Minute}, nil
   258  	}
   259  	return res, err
   260  }
   261  
   262  // initControlPlaneScope initializes the control plane scope; this includes also checking for orphan machines and
   263  // adopt them if necessary.
   264  // The func also returns a boolean indicating if adoptableMachine have been found and processed, but this doesn't imply those machines
   265  // have been actually adopted).
   266  func (r *KubeadmControlPlaneReconciler) initControlPlaneScope(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (*internal.ControlPlane, bool, error) {
   267  	log := ctrl.LoggerFrom(ctx)
   268  
   269  	// Return early if the cluster is not yet in a state where control plane machines exists
   270  	if !cluster.Status.InfrastructureReady || !cluster.Spec.ControlPlaneEndpoint.IsValid() {
   271  		controlPlane, err := internal.NewControlPlane(ctx, r.managementCluster, r.Client, cluster, kcp, collections.Machines{})
   272  		if err != nil {
   273  			log.Error(err, "failed to initialize control plane scope")
   274  			return nil, false, err
   275  		}
   276  		return controlPlane, false, nil
   277  	}
   278  
   279  	// Read control plane machines
   280  	controlPlaneMachines, err := r.managementClusterUncached.GetMachinesForCluster(ctx, cluster, collections.ControlPlaneMachines(cluster.Name))
   281  	if err != nil {
   282  		log.Error(err, "failed to retrieve control plane machines for cluster")
   283  		return nil, false, err
   284  	}
   285  
   286  	// If we are not deleting the CP, adopt stand alone CP machines if any
   287  	adoptableMachines := controlPlaneMachines.Filter(collections.AdoptableControlPlaneMachines(cluster.Name))
   288  	if kcp.ObjectMeta.DeletionTimestamp.IsZero() && len(adoptableMachines) > 0 {
   289  		return nil, true, r.adoptMachines(ctx, kcp, adoptableMachines, cluster)
   290  	}
   291  
   292  	ownedMachines := controlPlaneMachines.Filter(collections.OwnedMachines(kcp))
   293  	if kcp.ObjectMeta.DeletionTimestamp.IsZero() && len(ownedMachines) != len(controlPlaneMachines) {
   294  		err := errors.New("not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode")
   295  		log.Error(err, "KCP cannot reconcile")
   296  		return nil, false, err
   297  	}
   298  
   299  	controlPlane, err := internal.NewControlPlane(ctx, r.managementCluster, r.Client, cluster, kcp, ownedMachines)
   300  	if err != nil {
   301  		log.Error(err, "failed to initialize control plane scope")
   302  		return nil, false, err
   303  	}
   304  	return controlPlane, false, nil
   305  }
   306  
   307  func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kcp *controlplanev1.KubeadmControlPlane) error {
   308  	// Always update the readyCondition by summarizing the state of other conditions.
   309  	conditions.SetSummary(kcp,
   310  		conditions.WithConditions(
   311  			controlplanev1.MachinesCreatedCondition,
   312  			controlplanev1.MachinesSpecUpToDateCondition,
   313  			controlplanev1.ResizedCondition,
   314  			controlplanev1.MachinesReadyCondition,
   315  			controlplanev1.AvailableCondition,
   316  			controlplanev1.CertificatesAvailableCondition,
   317  		),
   318  	)
   319  
   320  	// Patch the object, ignoring conflicts on the conditions owned by this controller.
   321  	return patchHelper.Patch(
   322  		ctx,
   323  		kcp,
   324  		patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   325  			controlplanev1.MachinesCreatedCondition,
   326  			clusterv1.ReadyCondition,
   327  			controlplanev1.MachinesSpecUpToDateCondition,
   328  			controlplanev1.ResizedCondition,
   329  			controlplanev1.MachinesReadyCondition,
   330  			controlplanev1.AvailableCondition,
   331  			controlplanev1.CertificatesAvailableCondition,
   332  		}},
   333  		patch.WithStatusObservedGeneration{},
   334  	)
   335  }
   336  
   337  // reconcile handles KubeadmControlPlane reconciliation.
   338  func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPlane *internal.ControlPlane) (res ctrl.Result, reterr error) {
   339  	log := ctrl.LoggerFrom(ctx)
   340  	log.Info("Reconcile KubeadmControlPlane")
   341  
   342  	// Make sure to reconcile the external infrastructure reference.
   343  	if err := r.reconcileExternalReference(ctx, controlPlane.Cluster, &controlPlane.KCP.Spec.MachineTemplate.InfrastructureRef); err != nil {
   344  		return ctrl.Result{}, err
   345  	}
   346  
   347  	// Wait for the cluster infrastructure to be ready before creating machines
   348  	if !controlPlane.Cluster.Status.InfrastructureReady {
   349  		log.Info("Cluster infrastructure is not ready yet")
   350  		return ctrl.Result{}, nil
   351  	}
   352  
   353  	// Reconcile cluster certificates.
   354  	if err := r.reconcileClusterCertificates(ctx, controlPlane); err != nil {
   355  		return ctrl.Result{}, err
   356  	}
   357  
   358  	// If ControlPlaneEndpoint is not set, return early
   359  	if !controlPlane.Cluster.Spec.ControlPlaneEndpoint.IsValid() {
   360  		log.Info("Cluster does not yet have a ControlPlaneEndpoint defined")
   361  		return ctrl.Result{}, nil
   362  	}
   363  
   364  	// Generate Cluster Kubeconfig if needed
   365  	if result, err := r.reconcileKubeconfig(ctx, controlPlane); !result.IsZero() || err != nil {
   366  		if err != nil {
   367  			log.Error(err, "failed to reconcile Kubeconfig")
   368  		}
   369  		return result, err
   370  	}
   371  
   372  	if err := r.syncMachines(ctx, controlPlane); err != nil {
   373  		return ctrl.Result{}, errors.Wrap(err, "failed to sync Machines")
   374  	}
   375  
   376  	// Aggregate the operational state of all the machines; while aggregating we are adding the
   377  	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
   378  	conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef())
   379  
   380  	// Updates conditions reporting the status of static pods and the status of the etcd cluster.
   381  	// NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution.
   382  	if err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil {
   383  		return ctrl.Result{}, err
   384  	}
   385  
   386  	// Ensures the number of etcd members is in sync with the number of machines/nodes.
   387  	// NOTE: This is usually required after a machine deletion.
   388  	if err := r.reconcileEtcdMembers(ctx, controlPlane); err != nil {
   389  		return ctrl.Result{}, err
   390  	}
   391  
   392  	// Reconcile unhealthy machines by triggering deletion and requeue if it is considered safe to remediate,
   393  	// otherwise continue with the other KCP operations.
   394  	if result, err := r.reconcileUnhealthyMachines(ctx, controlPlane); err != nil || !result.IsZero() {
   395  		return result, err
   396  	}
   397  
   398  	// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
   399  	machinesNeedingRollout, rolloutReasons := controlPlane.MachinesNeedingRollout()
   400  	switch {
   401  	case len(machinesNeedingRollout) > 0:
   402  		var reasons []string
   403  		for _, rolloutReason := range rolloutReasons {
   404  			reasons = append(reasons, rolloutReason)
   405  		}
   406  		log.Info(fmt.Sprintf("Rolling out Control Plane machines: %s", strings.Join(reasons, ",")), "machinesNeedingRollout", machinesNeedingRollout.Names())
   407  		conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(machinesNeedingRollout), len(controlPlane.Machines)-len(machinesNeedingRollout))
   408  		return r.upgradeControlPlane(ctx, controlPlane, machinesNeedingRollout)
   409  	default:
   410  		// make sure last upgrade operation is marked as completed.
   411  		// NOTE: we are checking the condition already exists in order to avoid to set this condition at the first
   412  		// reconciliation/before a rolling upgrade actually starts.
   413  		if conditions.Has(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) {
   414  			conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition)
   415  		}
   416  	}
   417  
   418  	// If we've made it this far, we can assume that all ownedMachines are up to date
   419  	numMachines := len(controlPlane.Machines)
   420  	desiredReplicas := int(*controlPlane.KCP.Spec.Replicas)
   421  
   422  	switch {
   423  	// We are creating the first replica
   424  	case numMachines < desiredReplicas && numMachines == 0:
   425  		// Create new Machine w/ init
   426  		log.Info("Initializing control plane", "Desired", desiredReplicas, "Existing", numMachines)
   427  		conditions.MarkFalse(controlPlane.KCP, controlplanev1.AvailableCondition, controlplanev1.WaitingForKubeadmInitReason, clusterv1.ConditionSeverityInfo, "")
   428  		return r.initializeControlPlane(ctx, controlPlane)
   429  	// We are scaling up
   430  	case numMachines < desiredReplicas && numMachines > 0:
   431  		// Create a new Machine w/ join
   432  		log.Info("Scaling up control plane", "Desired", desiredReplicas, "Existing", numMachines)
   433  		return r.scaleUpControlPlane(ctx, controlPlane)
   434  	// We are scaling down
   435  	case numMachines > desiredReplicas:
   436  		log.Info("Scaling down control plane", "Desired", desiredReplicas, "Existing", numMachines)
   437  		// The last parameter (i.e. machines needing to be rolled out) should always be empty here.
   438  		return r.scaleDownControlPlane(ctx, controlPlane, collections.Machines{})
   439  	}
   440  
   441  	// Get the workload cluster client.
   442  	workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
   443  	if err != nil {
   444  		log.V(2).Info("cannot get remote client to workload cluster, will requeue", "cause", err)
   445  		return ctrl.Result{Requeue: true}, nil
   446  	}
   447  
   448  	// Ensure kubeadm role bindings for v1.18+
   449  	if err := workloadCluster.AllowBootstrapTokensToGetNodes(ctx); err != nil {
   450  		return ctrl.Result{}, errors.Wrap(err, "failed to set role and role binding for kubeadm")
   451  	}
   452  
   453  	// We intentionally only parse major/minor/patch so that the subsequent code
   454  	// also already applies to beta versions of new releases.
   455  	parsedVersion, err := version.ParseMajorMinorPatchTolerant(controlPlane.KCP.Spec.Version)
   456  	if err != nil {
   457  		return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version)
   458  	}
   459  
   460  	// Update kube-proxy daemonset.
   461  	if err := workloadCluster.UpdateKubeProxyImageInfo(ctx, controlPlane.KCP, parsedVersion); err != nil {
   462  		log.Error(err, "failed to update kube-proxy daemonset")
   463  		return ctrl.Result{}, err
   464  	}
   465  
   466  	// Update CoreDNS deployment.
   467  	if err := workloadCluster.UpdateCoreDNS(ctx, controlPlane.KCP, parsedVersion); err != nil {
   468  		return ctrl.Result{}, errors.Wrap(err, "failed to update CoreDNS deployment")
   469  	}
   470  
   471  	// Reconcile certificate expiry for Machines that don't have the expiry annotation on KubeadmConfig yet.
   472  	// Note: This requires that all control plane machines are working. We moved this to the end of the reconcile
   473  	// as nothing in the same reconcile depends on it and to ensure it doesn't block anything else,
   474  	// especially MHC remediation and rollout of changes to recover the control plane.
   475  	if err := r.reconcileCertificateExpiries(ctx, controlPlane); err != nil {
   476  		return ctrl.Result{}, err
   477  	}
   478  	return ctrl.Result{}, nil
   479  }
   480  
   481  // reconcileClusterCertificates ensures that all the cluster certificates exists and
   482  // enforces all the expected owner ref on them.
   483  func (r *KubeadmControlPlaneReconciler) reconcileClusterCertificates(ctx context.Context, controlPlane *internal.ControlPlane) error {
   484  	log := ctrl.LoggerFrom(ctx)
   485  
   486  	// Generate Cluster Certificates if needed
   487  	config := controlPlane.KCP.Spec.KubeadmConfigSpec.DeepCopy()
   488  	config.JoinConfiguration = nil
   489  	if config.ClusterConfiguration == nil {
   490  		config.ClusterConfiguration = &bootstrapv1.ClusterConfiguration{}
   491  	}
   492  	certificates := secret.NewCertificatesForInitialControlPlane(config.ClusterConfiguration)
   493  	controllerRef := metav1.NewControllerRef(controlPlane.KCP, controlplanev1.GroupVersion.WithKind(kubeadmControlPlaneKind))
   494  	if err := certificates.LookupOrGenerateCached(ctx, r.SecretCachingClient, r.Client, util.ObjectKey(controlPlane.Cluster), *controllerRef); err != nil {
   495  		log.Error(err, "unable to lookup or create cluster certificates")
   496  		conditions.MarkFalse(controlPlane.KCP, controlplanev1.CertificatesAvailableCondition, controlplanev1.CertificatesGenerationFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
   497  		return err
   498  	}
   499  
   500  	if err := r.ensureCertificatesOwnerRef(ctx, certificates, *controllerRef); err != nil {
   501  		return err
   502  	}
   503  
   504  	conditions.MarkTrue(controlPlane.KCP, controlplanev1.CertificatesAvailableCondition)
   505  	return nil
   506  }
   507  
   508  // reconcileDelete handles KubeadmControlPlane deletion.
   509  // The implementation does not take non-control plane workloads into consideration. This may or may not change in the future.
   510  // Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064.
   511  func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
   512  	log := ctrl.LoggerFrom(ctx)
   513  	log.Info("Reconcile KubeadmControlPlane deletion")
   514  
   515  	// If no control plane machines remain, remove the finalizer
   516  	if len(controlPlane.Machines) == 0 {
   517  		controllerutil.RemoveFinalizer(controlPlane.KCP, controlplanev1.KubeadmControlPlaneFinalizer)
   518  		return ctrl.Result{}, nil
   519  	}
   520  
   521  	// Updates conditions reporting the status of static pods and the status of the etcd cluster.
   522  	// NOTE: Ignoring failures given that we are deleting
   523  	if err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil {
   524  		log.Info("failed to reconcile conditions", "error", err.Error())
   525  	}
   526  
   527  	// Aggregate the operational state of all the machines; while aggregating we are adding the
   528  	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
   529  	// However, during delete we are hiding the counter (1 of x) because it does not make sense given that
   530  	// all the machines are deleted in parallel.
   531  	conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef())
   532  
   533  	// Gets all machines, not just control plane machines.
   534  	allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, controlPlane.Cluster)
   535  	if err != nil {
   536  		return ctrl.Result{}, err
   537  	}
   538  
   539  	allMachinePools := &expv1.MachinePoolList{}
   540  	// Get all machine pools.
   541  	if feature.Gates.Enabled(feature.MachinePool) {
   542  		allMachinePools, err = r.managementCluster.GetMachinePoolsForCluster(ctx, controlPlane.Cluster)
   543  		if err != nil {
   544  			return ctrl.Result{}, err
   545  		}
   546  	}
   547  	// Verify that only control plane machines remain
   548  	if len(allMachines) != len(controlPlane.Machines) || len(allMachinePools.Items) != 0 {
   549  		log.Info("Waiting for worker nodes to be deleted first")
   550  		conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")
   551  		return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
   552  	}
   553  
   554  	// Delete control plane machines in parallel
   555  	machinesToDelete := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
   556  	var errs []error
   557  	for i := range machinesToDelete {
   558  		m := machinesToDelete[i]
   559  		logger := log.WithValues("Machine", klog.KObj(m))
   560  		if err := r.Client.Delete(ctx, machinesToDelete[i]); err != nil && !apierrors.IsNotFound(err) {
   561  			logger.Error(err, "Failed to cleanup owned machine")
   562  			errs = append(errs, err)
   563  		}
   564  	}
   565  	if len(errs) > 0 {
   566  		err := kerrors.NewAggregate(errs)
   567  		r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedDelete",
   568  			"Failed to delete control plane Machines for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err)
   569  		return ctrl.Result{}, err
   570  	}
   571  	conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")
   572  	return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
   573  }
   574  
   575  // ClusterToKubeadmControlPlane is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
   576  // for KubeadmControlPlane based on updates to a Cluster.
   577  func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(_ context.Context, o client.Object) []ctrl.Request {
   578  	c, ok := o.(*clusterv1.Cluster)
   579  	if !ok {
   580  		panic(fmt.Sprintf("Expected a Cluster but got a %T", o))
   581  	}
   582  
   583  	controlPlaneRef := c.Spec.ControlPlaneRef
   584  	if controlPlaneRef != nil && controlPlaneRef.Kind == kubeadmControlPlaneKind {
   585  		return []ctrl.Request{{NamespacedName: client.ObjectKey{Namespace: controlPlaneRef.Namespace, Name: controlPlaneRef.Name}}}
   586  	}
   587  
   588  	return nil
   589  }
   590  
   591  // syncMachines updates Machines, InfrastructureMachines and KubeadmConfigs to propagate in-place mutable fields from KCP.
   592  // Note: It also cleans up managed fields of all Machines so that Machines that were
   593  // created/patched before (< v1.4.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA.
   594  // Note: For InfrastructureMachines and KubeadmConfigs it also drops ownership of "metadata.labels" and
   595  // "metadata.annotations" from "manager" so that "capi-kubeadmcontrolplane" can own these fields and can work with SSA.
   596  // Otherwise, fields would be co-owned by our "old" "manager" and "capi-kubeadmcontrolplane" and then we would not be
   597  // able to e.g. drop labels and annotations.
   598  func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, controlPlane *internal.ControlPlane) error {
   599  	patchHelpers := map[string]*patch.Helper{}
   600  	for machineName := range controlPlane.Machines {
   601  		m := controlPlane.Machines[machineName]
   602  		// If the machine is already being deleted, we don't need to update it.
   603  		if !m.DeletionTimestamp.IsZero() {
   604  			continue
   605  		}
   606  
   607  		// Cleanup managed fields of all Machines.
   608  		// We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA)
   609  		// (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and
   610  		// "capi-kubeadmcontrolplane" and then we would not be able to e.g. drop labels and annotations.
   611  		if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, m, kcpManagerName); err != nil {
   612  			return errors.Wrapf(err, "failed to update Machine: failed to adjust the managedFields of the Machine %s", klog.KObj(m))
   613  		}
   614  		// Update Machine to propagate in-place mutable fields from KCP.
   615  		updatedMachine, err := r.updateMachine(ctx, m, controlPlane.KCP, controlPlane.Cluster)
   616  		if err != nil {
   617  			return errors.Wrapf(err, "failed to update Machine: %s", klog.KObj(m))
   618  		}
   619  		controlPlane.Machines[machineName] = updatedMachine
   620  		// Since the machine is updated, re-create the patch helper so that any subsequent
   621  		// Patch calls use the correct base machine object to calculate the diffs.
   622  		// Example: reconcileControlPlaneConditions patches the machine objects in a subsequent call
   623  		// and, it should use the updated machine to calculate the diff.
   624  		// Note: If the patchHelpers are not re-computed based on the new updated machines, subsequent
   625  		// Patch calls will fail because the patch will be calculated based on an outdated machine and will error
   626  		// because of outdated resourceVersion.
   627  		// TODO: This should be cleaned-up to have a more streamline way of constructing and using patchHelpers.
   628  		patchHelper, err := patch.NewHelper(updatedMachine, r.Client)
   629  		if err != nil {
   630  			return err
   631  		}
   632  		patchHelpers[machineName] = patchHelper
   633  
   634  		labelsAndAnnotationsManagedFieldPaths := []contract.Path{
   635  			{"f:metadata", "f:annotations"},
   636  			{"f:metadata", "f:labels"},
   637  		}
   638  		infraMachine, infraMachineFound := controlPlane.InfraResources[machineName]
   639  		// Only update the InfraMachine if it is already found, otherwise just skip it.
   640  		// This could happen e.g. if the cache is not up-to-date yet.
   641  		if infraMachineFound {
   642  			// Cleanup managed fields of all InfrastructureMachines to drop ownership of labels and annotations
   643  			// from "manager". We do this so that InfrastructureMachines that are created using the Create method
   644  			// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
   645  			// and "capi-kubeadmcontrolplane" and then we would not be able to e.g. drop labels and annotations.
   646  			if err := ssa.DropManagedFields(ctx, r.Client, infraMachine, kcpManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
   647  				return errors.Wrapf(err, "failed to clean up managedFields of InfrastructureMachine %s", klog.KObj(infraMachine))
   648  			}
   649  			// Update in-place mutating fields on InfrastructureMachine.
   650  			if err := r.updateExternalObject(ctx, infraMachine, controlPlane.KCP, controlPlane.Cluster); err != nil {
   651  				return errors.Wrapf(err, "failed to update InfrastructureMachine %s", klog.KObj(infraMachine))
   652  			}
   653  		}
   654  
   655  		kubeadmConfig, kubeadmConfigFound := controlPlane.KubeadmConfigs[machineName]
   656  		// Only update the KubeadmConfig if it is already found, otherwise just skip it.
   657  		// This could happen e.g. if the cache is not up-to-date yet.
   658  		if kubeadmConfigFound {
   659  			// Note: Set the GroupVersionKind because updateExternalObject depends on it.
   660  			kubeadmConfig.SetGroupVersionKind(m.Spec.Bootstrap.ConfigRef.GroupVersionKind())
   661  			// Cleanup managed fields of all KubeadmConfigs to drop ownership of labels and annotations
   662  			// from "manager". We do this so that KubeadmConfigs that are created using the Create method
   663  			// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
   664  			// and "capi-kubeadmcontrolplane" and then we would not be able to e.g. drop labels and annotations.
   665  			if err := ssa.DropManagedFields(ctx, r.Client, kubeadmConfig, kcpManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
   666  				return errors.Wrapf(err, "failed to clean up managedFields of KubeadmConfig %s", klog.KObj(kubeadmConfig))
   667  			}
   668  			// Update in-place mutating fields on BootstrapConfig.
   669  			if err := r.updateExternalObject(ctx, kubeadmConfig, controlPlane.KCP, controlPlane.Cluster); err != nil {
   670  				return errors.Wrapf(err, "failed to update KubeadmConfig %s", klog.KObj(kubeadmConfig))
   671  			}
   672  		}
   673  	}
   674  	// Update the patch helpers.
   675  	controlPlane.SetPatchHelpers(patchHelpers)
   676  	return nil
   677  }
   678  
   679  // reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
   680  // the status of the etcd cluster.
   681  func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) error {
   682  	// If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information
   683  	// for updating conditions. Return early.
   684  	if !controlPlane.KCP.Status.Initialized {
   685  		return nil
   686  	}
   687  
   688  	workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
   689  	if err != nil {
   690  		return errors.Wrap(err, "cannot get remote client to workload cluster")
   691  	}
   692  
   693  	// Update conditions status
   694  	workloadCluster.UpdateStaticPodConditions(ctx, controlPlane)
   695  	workloadCluster.UpdateEtcdConditions(ctx, controlPlane)
   696  
   697  	// Patch machines with the updated conditions.
   698  	if err := controlPlane.PatchMachines(ctx); err != nil {
   699  		return err
   700  	}
   701  
   702  	// KCP will be patched at the end of Reconcile to reflect updated conditions, so we can return now.
   703  	return nil
   704  }
   705  
   706  // reconcileEtcdMembers ensures the number of etcd members is in sync with the number of machines/nodes.
   707  // This is usually required after a machine deletion.
   708  //
   709  // NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this.
   710  func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) error {
   711  	log := ctrl.LoggerFrom(ctx)
   712  
   713  	// If etcd is not managed by KCP this is a no-op.
   714  	if !controlPlane.IsEtcdManaged() {
   715  		return nil
   716  	}
   717  
   718  	// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
   719  	if controlPlane.Machines.Len() == 0 {
   720  		return nil
   721  	}
   722  
   723  	// Collect all the node names.
   724  	nodeNames := []string{}
   725  	for _, machine := range controlPlane.Machines {
   726  		if machine.Status.NodeRef == nil {
   727  			// If there are provisioning machines (machines without a node yet), return.
   728  			return nil
   729  		}
   730  		nodeNames = append(nodeNames, machine.Status.NodeRef.Name)
   731  	}
   732  
   733  	// Potential inconsistencies between the list of members and the list of machines/nodes are
   734  	// surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early.
   735  	if conditions.IsTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) {
   736  		return nil
   737  	}
   738  
   739  	workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
   740  	if err != nil {
   741  		// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
   742  		return errors.Wrap(err, "cannot get remote client to workload cluster")
   743  	}
   744  
   745  	parsedVersion, err := semver.ParseTolerant(controlPlane.KCP.Spec.Version)
   746  	if err != nil {
   747  		return errors.Wrapf(err, "failed to parse kubernetes version %q", controlPlane.KCP.Spec.Version)
   748  	}
   749  
   750  	removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames, parsedVersion)
   751  	if err != nil {
   752  		return errors.Wrap(err, "failed attempt to reconcile etcd members")
   753  	}
   754  
   755  	if len(removedMembers) > 0 {
   756  		log.Info("Etcd members without nodes removed from the cluster", "members", removedMembers)
   757  	}
   758  
   759  	return nil
   760  }
   761  
   762  func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context.Context, controlPlane *internal.ControlPlane) error {
   763  	log := ctrl.LoggerFrom(ctx)
   764  
   765  	// Return if there are no KCP-owned control-plane machines.
   766  	if controlPlane.Machines.Len() == 0 {
   767  		return nil
   768  	}
   769  
   770  	// Return if KCP is not yet initialized (no API server to contact for checking certificate expiration).
   771  	if !controlPlane.KCP.Status.Initialized {
   772  		return nil
   773  	}
   774  
   775  	// Ignore machines which are being deleted.
   776  	machines := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
   777  
   778  	workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
   779  	if err != nil {
   780  		return errors.Wrap(err, "failed to reconcile certificate expiries: cannot get remote client to workload cluster")
   781  	}
   782  
   783  	for _, m := range machines {
   784  		log := log.WithValues("Machine", klog.KObj(m))
   785  
   786  		kubeadmConfig, ok := controlPlane.GetKubeadmConfig(m.Name)
   787  		if !ok {
   788  			// Skip if the Machine doesn't have a KubeadmConfig.
   789  			continue
   790  		}
   791  
   792  		annotations := kubeadmConfig.GetAnnotations()
   793  		if _, ok := annotations[clusterv1.MachineCertificatesExpiryDateAnnotation]; ok {
   794  			// Skip if annotation is already set.
   795  			continue
   796  		}
   797  
   798  		if m.Status.NodeRef == nil {
   799  			// Skip if the Machine is still provisioning.
   800  			continue
   801  		}
   802  		nodeName := m.Status.NodeRef.Name
   803  		log = log.WithValues("Node", klog.KRef("", nodeName))
   804  
   805  		log.V(3).Info("Reconciling certificate expiry")
   806  		certificateExpiry, err := workloadCluster.GetAPIServerCertificateExpiry(ctx, kubeadmConfig, nodeName)
   807  		if err != nil {
   808  			return errors.Wrapf(err, "failed to reconcile certificate expiry for Machine/%s", m.Name)
   809  		}
   810  		expiry := certificateExpiry.Format(time.RFC3339)
   811  
   812  		log.V(2).Info(fmt.Sprintf("Setting certificate expiry to %s", expiry))
   813  		patchHelper, err := patch.NewHelper(kubeadmConfig, r.Client)
   814  		if err != nil {
   815  			return errors.Wrapf(err, "failed to reconcile certificate expiry for Machine/%s", m.Name)
   816  		}
   817  
   818  		if annotations == nil {
   819  			annotations = map[string]string{}
   820  		}
   821  		annotations[clusterv1.MachineCertificatesExpiryDateAnnotation] = expiry
   822  		kubeadmConfig.SetAnnotations(annotations)
   823  
   824  		if err := patchHelper.Patch(ctx, kubeadmConfig); err != nil {
   825  			return errors.Wrapf(err, "failed to reconcile certificate expiry for Machine/%s", m.Name)
   826  		}
   827  	}
   828  
   829  	return nil
   830  }
   831  
   832  func (r *KubeadmControlPlaneReconciler) adoptMachines(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines, cluster *clusterv1.Cluster) error {
   833  	// We do an uncached full quorum read against the KCP to avoid re-adopting Machines the garbage collector just intentionally orphaned
   834  	// See https://github.com/kubernetes/kubernetes/issues/42639
   835  	uncached := controlplanev1.KubeadmControlPlane{}
   836  	err := r.managementClusterUncached.Get(ctx, client.ObjectKey{Namespace: kcp.Namespace, Name: kcp.Name}, &uncached)
   837  	if err != nil {
   838  		return errors.Wrapf(err, "failed to check whether %v/%v was deleted before adoption", kcp.GetNamespace(), kcp.GetName())
   839  	}
   840  	if !uncached.DeletionTimestamp.IsZero() {
   841  		return errors.Errorf("%v/%v has just been deleted at %v", kcp.GetNamespace(), kcp.GetName(), kcp.GetDeletionTimestamp())
   842  	}
   843  
   844  	kcpVersion, err := semver.ParseTolerant(kcp.Spec.Version)
   845  	if err != nil {
   846  		return errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version)
   847  	}
   848  
   849  	for _, m := range machines {
   850  		ref := m.Spec.Bootstrap.ConfigRef
   851  
   852  		// TODO instead of returning error here, we should instead Event and add a watch on potentially adoptable Machines
   853  		if ref == nil || ref.Kind != "KubeadmConfig" {
   854  			return errors.Errorf("unable to adopt Machine %v/%v: expected a ConfigRef of kind KubeadmConfig but instead found %v", m.Namespace, m.Name, ref)
   855  		}
   856  
   857  		// TODO instead of returning error here, we should instead Event and add a watch on potentially adoptable Machines
   858  		if ref.Namespace != "" && ref.Namespace != kcp.Namespace {
   859  			return errors.Errorf("could not adopt resources from KubeadmConfig %v/%v: cannot adopt across namespaces", ref.Namespace, ref.Name)
   860  		}
   861  
   862  		if m.Spec.Version == nil {
   863  			// if the machine's version is not immediately apparent, assume the operator knows what they're doing
   864  			continue
   865  		}
   866  
   867  		machineVersion, err := semver.ParseTolerant(*m.Spec.Version)
   868  		if err != nil {
   869  			return errors.Wrapf(err, "failed to parse kubernetes version %q", *m.Spec.Version)
   870  		}
   871  
   872  		if !util.IsSupportedVersionSkew(kcpVersion, machineVersion) {
   873  			r.recorder.Eventf(kcp, corev1.EventTypeWarning, "AdoptionFailed", "Could not adopt Machine %s/%s: its version (%q) is outside supported +/- one minor version skew from KCP's (%q)", m.Namespace, m.Name, *m.Spec.Version, kcp.Spec.Version)
   874  			// avoid returning an error here so we don't cause the KCP controller to spin until the operator clarifies their intent
   875  			return nil
   876  		}
   877  	}
   878  
   879  	for _, m := range machines {
   880  		ref := m.Spec.Bootstrap.ConfigRef
   881  		cfg := &bootstrapv1.KubeadmConfig{}
   882  
   883  		if err := r.Client.Get(ctx, client.ObjectKey{Name: ref.Name, Namespace: kcp.Namespace}, cfg); err != nil {
   884  			return err
   885  		}
   886  
   887  		if err := r.adoptOwnedSecrets(ctx, kcp, cfg, cluster.Name); err != nil {
   888  			return err
   889  		}
   890  
   891  		patchHelper, err := patch.NewHelper(m, r.Client)
   892  		if err != nil {
   893  			return err
   894  		}
   895  
   896  		if err := controllerutil.SetControllerReference(kcp, m, r.Client.Scheme()); err != nil {
   897  			return err
   898  		}
   899  
   900  		// Note that ValidateOwnerReferences() will reject this patch if another
   901  		// OwnerReference exists with controller=true.
   902  		if err := patchHelper.Patch(ctx, m); err != nil {
   903  			return err
   904  		}
   905  	}
   906  	return nil
   907  }
   908  
   909  func (r *KubeadmControlPlaneReconciler) adoptOwnedSecrets(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, currentOwner *bootstrapv1.KubeadmConfig, clusterName string) error {
   910  	secrets := corev1.SecretList{}
   911  	if err := r.Client.List(ctx, &secrets, client.InNamespace(kcp.Namespace), client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}); err != nil {
   912  		return errors.Wrap(err, "error finding secrets for adoption")
   913  	}
   914  
   915  	for i := range secrets.Items {
   916  		s := secrets.Items[i]
   917  		if !util.IsOwnedByObject(&s, currentOwner) {
   918  			continue
   919  		}
   920  		// avoid taking ownership of the bootstrap data secret
   921  		if currentOwner.Status.DataSecretName != nil && s.Name == *currentOwner.Status.DataSecretName {
   922  			continue
   923  		}
   924  
   925  		ss := s.DeepCopy()
   926  
   927  		ss.SetOwnerReferences(util.ReplaceOwnerRef(ss.GetOwnerReferences(), currentOwner, metav1.OwnerReference{
   928  			APIVersion:         controlplanev1.GroupVersion.String(),
   929  			Kind:               "KubeadmControlPlane",
   930  			Name:               kcp.Name,
   931  			UID:                kcp.UID,
   932  			Controller:         ptr.To(true),
   933  			BlockOwnerDeletion: ptr.To(true),
   934  		}))
   935  
   936  		if err := r.Client.Update(ctx, ss); err != nil {
   937  			return errors.Wrapf(err, "error changing secret %v ownership from KubeadmConfig/%v to KubeadmControlPlane/%v", s.Name, currentOwner.GetName(), kcp.Name)
   938  		}
   939  	}
   940  
   941  	return nil
   942  }
   943  
   944  // ensureCertificatesOwnerRef ensures an ownerReference to the owner is added on the Secrets holding certificates.
   945  func (r *KubeadmControlPlaneReconciler) ensureCertificatesOwnerRef(ctx context.Context, certificates secret.Certificates, owner metav1.OwnerReference) error {
   946  	for _, c := range certificates {
   947  		if c.Secret == nil {
   948  			continue
   949  		}
   950  
   951  		patchHelper, err := patch.NewHelper(c.Secret, r.Client)
   952  		if err != nil {
   953  			return err
   954  		}
   955  
   956  		controller := metav1.GetControllerOf(c.Secret)
   957  		// If the current controller is KCP, ensure the owner reference is up to date.
   958  		// Note: This ensures secrets created prior to v1alpha4 are updated to have the correct owner reference apiVersion.
   959  		if controller != nil && controller.Kind == kubeadmControlPlaneKind {
   960  			c.Secret.SetOwnerReferences(util.EnsureOwnerRef(c.Secret.GetOwnerReferences(), owner))
   961  		}
   962  
   963  		// If the Type doesn't match the type used for secrets created by core components continue without altering the owner reference further.
   964  		// Note: This ensures that control plane related secrets created by KubeadmConfig are eventually owned by KCP.
   965  		// TODO: Remove this logic once standalone control plane machines are no longer allowed.
   966  		if c.Secret.Type == clusterv1.ClusterSecretType {
   967  			// Remove the current controller if one exists.
   968  			if controller != nil {
   969  				c.Secret.SetOwnerReferences(util.RemoveOwnerRef(c.Secret.GetOwnerReferences(), *controller))
   970  			}
   971  			c.Secret.SetOwnerReferences(util.EnsureOwnerRef(c.Secret.GetOwnerReferences(), owner))
   972  		}
   973  		if err := patchHelper.Patch(ctx, c.Secret); err != nil {
   974  			return errors.Wrapf(err, "failed to set ownerReference")
   975  		}
   976  	}
   977  	return nil
   978  }