sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/control_plane.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package internal
    18  
    19  import (
    20  	"context"
    21  
    22  	"github.com/pkg/errors"
    23  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    26  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    27  	"sigs.k8s.io/controller-runtime/pkg/client"
    28  
    29  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    30  	bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    31  	"sigs.k8s.io/cluster-api/controllers/external"
    32  	controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
    33  	"sigs.k8s.io/cluster-api/util/collections"
    34  	"sigs.k8s.io/cluster-api/util/failuredomains"
    35  	"sigs.k8s.io/cluster-api/util/patch"
    36  )
    37  
    38  // ControlPlane holds business logic around control planes.
    39  // It should never need to connect to a service, that responsibility lies outside of this struct.
    40  // Going forward we should be trying to add more logic to here and reduce the amount of logic in the reconciler.
    41  type ControlPlane struct {
    42  	KCP                  *controlplanev1.KubeadmControlPlane
    43  	Cluster              *clusterv1.Cluster
    44  	Machines             collections.Machines
    45  	machinesPatchHelpers map[string]*patch.Helper
    46  
    47  	// reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations
    48  	reconciliationTime metav1.Time
    49  
    50  	// TODO: we should see if we can combine these with the Machine objects so we don't have all these separate lookups
    51  	// See discussion on https://github.com/kubernetes-sigs/cluster-api/pull/3405
    52  	KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig
    53  	InfraResources map[string]*unstructured.Unstructured
    54  
    55  	managementCluster ManagementCluster
    56  	workloadCluster   WorkloadCluster
    57  }
    58  
    59  // NewControlPlane returns an instantiated ControlPlane.
    60  func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) {
    61  	infraObjects, err := getInfraResources(ctx, client, ownedMachines)
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  	kubeadmConfigs, err := getKubeadmConfigs(ctx, client, ownedMachines)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	patchHelpers := map[string]*patch.Helper{}
    70  	for _, machine := range ownedMachines {
    71  		patchHelper, err := patch.NewHelper(machine, client)
    72  		if err != nil {
    73  			return nil, err
    74  		}
    75  		patchHelpers[machine.Name] = patchHelper
    76  	}
    77  
    78  	return &ControlPlane{
    79  		KCP:                  kcp,
    80  		Cluster:              cluster,
    81  		Machines:             ownedMachines,
    82  		machinesPatchHelpers: patchHelpers,
    83  		KubeadmConfigs:       kubeadmConfigs,
    84  		InfraResources:       infraObjects,
    85  		reconciliationTime:   metav1.Now(),
    86  		managementCluster:    managementCluster,
    87  	}, nil
    88  }
    89  
    90  // FailureDomains returns a slice of failure domain objects synced from the infrastructure provider into Cluster.Status.
    91  func (c *ControlPlane) FailureDomains() clusterv1.FailureDomains {
    92  	if c.Cluster.Status.FailureDomains == nil {
    93  		return clusterv1.FailureDomains{}
    94  	}
    95  	return c.Cluster.Status.FailureDomains
    96  }
    97  
    98  // MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it.
    99  func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) (*clusterv1.Machine, error) {
   100  	fd := c.FailureDomainWithMostMachines(ctx, machines)
   101  	machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fd))
   102  	machineToMark := machinesInFailureDomain.Oldest()
   103  	if machineToMark == nil {
   104  		return nil, errors.New("failed to pick control plane Machine to mark for deletion")
   105  	}
   106  	return machineToMark, nil
   107  }
   108  
   109  // MachineWithDeleteAnnotation returns a machine that has been annotated with DeleteMachineAnnotation key.
   110  func (c *ControlPlane) MachineWithDeleteAnnotation(machines collections.Machines) collections.Machines {
   111  	// See if there are any machines with DeleteMachineAnnotation key.
   112  	annotatedMachines := machines.Filter(collections.HasAnnotationKey(clusterv1.DeleteMachineAnnotation))
   113  	// If there are, return list of annotated machines.
   114  	return annotatedMachines
   115  }
   116  
   117  // FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most
   118  // control-plane machines on it.
   119  func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) *string {
   120  	// See if there are any Machines that are not in currently defined failure domains first.
   121  	notInFailureDomains := machines.Filter(
   122  		collections.Not(collections.InFailureDomains(c.FailureDomains().FilterControlPlane().GetIDs()...)),
   123  	)
   124  	if len(notInFailureDomains) > 0 {
   125  		// return the failure domain for the oldest Machine not in the current list of failure domains
   126  		// this could be either nil (no failure domain defined) or a failure domain that is no longer defined
   127  		// in the cluster status.
   128  		return notInFailureDomains.Oldest().Spec.FailureDomain
   129  	}
   130  	return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)
   131  }
   132  
   133  // NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines.
   134  func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) *string {
   135  	if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
   136  		return nil
   137  	}
   138  	return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines())
   139  }
   140  
   141  // InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
   142  func (c *ControlPlane) InitialControlPlaneConfig() *bootstrapv1.KubeadmConfigSpec {
   143  	bootstrapSpec := c.KCP.Spec.KubeadmConfigSpec.DeepCopy()
   144  	bootstrapSpec.JoinConfiguration = nil
   145  	return bootstrapSpec
   146  }
   147  
   148  // JoinControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for joining control planes.
   149  func (c *ControlPlane) JoinControlPlaneConfig() *bootstrapv1.KubeadmConfigSpec {
   150  	bootstrapSpec := c.KCP.Spec.KubeadmConfigSpec.DeepCopy()
   151  	bootstrapSpec.InitConfiguration = nil
   152  	// NOTE: For the joining we are preserving the ClusterConfiguration in order to determine if the
   153  	// cluster is using an external etcd in the kubeadm bootstrap provider (even if this is not required by kubeadm Join).
   154  	// TODO: Determine if this copy of cluster configuration can be used for rollouts (thus allowing to remove the annotation at machine level)
   155  	return bootstrapSpec
   156  }
   157  
   158  // HasDeletingMachine returns true if any machine in the control plane is in the process of being deleted.
   159  func (c *ControlPlane) HasDeletingMachine() bool {
   160  	return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0
   161  }
   162  
   163  // GetKubeadmConfig returns the KubeadmConfig of a given machine.
   164  func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.KubeadmConfig, bool) {
   165  	kubeadmConfig, ok := c.KubeadmConfigs[machineName]
   166  	return kubeadmConfig, ok
   167  }
   168  
   169  // MachinesNeedingRollout return a list of machines that need to be rolled out.
   170  func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string) {
   171  	// Ignore machines to be deleted.
   172  	machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
   173  
   174  	// Return machines if they are scheduled for rollout or if with an outdated configuration.
   175  	machinesNeedingRollout := make(collections.Machines, len(machines))
   176  	rolloutReasons := map[string]string{}
   177  	for _, m := range machines {
   178  		reason, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
   179  		if needsRollout {
   180  			machinesNeedingRollout.Insert(m)
   181  			rolloutReasons[m.Name] = reason
   182  		}
   183  	}
   184  	return machinesNeedingRollout, rolloutReasons
   185  }
   186  
   187  // UpToDateMachines returns the machines that are up to date with the control
   188  // plane's configuration and therefore do not require rollout.
   189  func (c *ControlPlane) UpToDateMachines() collections.Machines {
   190  	upToDateMachines := make(collections.Machines, len(c.Machines))
   191  	for _, m := range c.Machines {
   192  		_, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
   193  		if !needsRollout {
   194  			upToDateMachines.Insert(m)
   195  		}
   196  	}
   197  	return upToDateMachines
   198  }
   199  
   200  // getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource.
   201  func getInfraResources(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*unstructured.Unstructured, error) {
   202  	result := map[string]*unstructured.Unstructured{}
   203  	for _, m := range machines {
   204  		infraObj, err := external.Get(ctx, cl, &m.Spec.InfrastructureRef, m.Namespace)
   205  		if err != nil {
   206  			if apierrors.IsNotFound(errors.Cause(err)) {
   207  				continue
   208  			}
   209  			return nil, errors.Wrapf(err, "failed to retrieve infra obj for machine %q", m.Name)
   210  		}
   211  		result[m.Name] = infraObj
   212  	}
   213  	return result, nil
   214  }
   215  
   216  // getKubeadmConfigs fetches the kubeadm config for each machine in the collection and returns a map of machine.Name -> KubeadmConfig.
   217  func getKubeadmConfigs(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*bootstrapv1.KubeadmConfig, error) {
   218  	result := map[string]*bootstrapv1.KubeadmConfig{}
   219  	for _, m := range machines {
   220  		bootstrapRef := m.Spec.Bootstrap.ConfigRef
   221  		if bootstrapRef == nil {
   222  			continue
   223  		}
   224  		machineConfig := &bootstrapv1.KubeadmConfig{}
   225  		if err := cl.Get(ctx, client.ObjectKey{Name: bootstrapRef.Name, Namespace: m.Namespace}, machineConfig); err != nil {
   226  			if apierrors.IsNotFound(errors.Cause(err)) {
   227  				continue
   228  			}
   229  			return nil, errors.Wrapf(err, "failed to retrieve bootstrap config for machine %q", m.Name)
   230  		}
   231  		result[m.Name] = machineConfig
   232  	}
   233  	return result, nil
   234  }
   235  
   236  // IsEtcdManaged returns true if the control plane relies on a managed etcd.
   237  func (c *ControlPlane) IsEtcdManaged() bool {
   238  	return c.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration == nil || c.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External == nil
   239  }
   240  
   241  // UnhealthyMachinesWithUnhealthyControlPlaneComponents returns all unhealthy control plane machines that
   242  // have unhealthy control plane components.
   243  // It differs from UnhealthyMachinesByHealthCheck which checks `MachineHealthCheck` conditions.
   244  func (c *ControlPlane) UnhealthyMachinesWithUnhealthyControlPlaneComponents(machines collections.Machines) collections.Machines {
   245  	return machines.Filter(collections.HasUnhealthyControlPlaneComponents(c.IsEtcdManaged()))
   246  }
   247  
   248  // UnhealthyMachinesByMachineHealthCheck returns the list of control plane machines marked as unhealthy by Machine Health Check.
   249  func (c *ControlPlane) UnhealthyMachinesByMachineHealthCheck() collections.Machines {
   250  	return c.Machines.Filter(collections.HasUnhealthyCondition)
   251  }
   252  
   253  // HealthyMachinesByMachineHealthCheck returns the list of control plane machines not marked as unhealthy by Machine Health Check.
   254  func (c *ControlPlane) HealthyMachinesByMachineHealthCheck() collections.Machines {
   255  	return c.Machines.Filter(collections.Not(collections.HasUnhealthyCondition))
   256  }
   257  
   258  // HasUnhealthyMachineByMachineHealthCheck returns true if any machine in the control plane is marked as unhealthy by Machine Health Check.
   259  func (c *ControlPlane) HasUnhealthyMachineByMachineHealthCheck() bool {
   260  	return len(c.UnhealthyMachinesByMachineHealthCheck()) > 0
   261  }
   262  
   263  // HasHealthyMachineStillProvisioning returns true if any healthy machine in the control plane is still in the process of being provisioned.
   264  func (c *ControlPlane) HasHealthyMachineStillProvisioning() bool {
   265  	return len(c.HealthyMachinesByMachineHealthCheck().Filter(collections.Not(collections.HasNode()))) > 0
   266  }
   267  
   268  // PatchMachines patches all the machines conditions.
   269  func (c *ControlPlane) PatchMachines(ctx context.Context) error {
   270  	errList := []error{}
   271  	for i := range c.Machines {
   272  		machine := c.Machines[i]
   273  		if helper, ok := c.machinesPatchHelpers[machine.Name]; ok {
   274  			if err := helper.Patch(ctx, machine, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   275  				controlplanev1.MachineAPIServerPodHealthyCondition,
   276  				controlplanev1.MachineControllerManagerPodHealthyCondition,
   277  				controlplanev1.MachineSchedulerPodHealthyCondition,
   278  				controlplanev1.MachineEtcdPodHealthyCondition,
   279  				controlplanev1.MachineEtcdMemberHealthyCondition,
   280  			}}); err != nil {
   281  				errList = append(errList, err)
   282  			}
   283  			continue
   284  		}
   285  		errList = append(errList, errors.Errorf("failed to get patch helper for machine %s", machine.Name))
   286  	}
   287  	return kerrors.NewAggregate(errList)
   288  }
   289  
   290  // SetPatchHelpers updates the patch helpers.
   291  func (c *ControlPlane) SetPatchHelpers(patchHelpers map[string]*patch.Helper) {
   292  	if c.machinesPatchHelpers == nil {
   293  		c.machinesPatchHelpers = map[string]*patch.Helper{}
   294  	}
   295  	for machineName, patchHelper := range patchHelpers {
   296  		c.machinesPatchHelpers[machineName] = patchHelper
   297  	}
   298  }
   299  
   300  // GetWorkloadCluster builds a cluster object.
   301  // The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine.
   302  func (c *ControlPlane) GetWorkloadCluster(ctx context.Context) (WorkloadCluster, error) {
   303  	if c.workloadCluster != nil {
   304  		return c.workloadCluster, nil
   305  	}
   306  
   307  	workloadCluster, err := c.managementCluster.GetWorkloadCluster(ctx, client.ObjectKeyFromObject(c.Cluster))
   308  	if err != nil {
   309  		return nil, err
   310  	}
   311  	c.workloadCluster = workloadCluster
   312  	return c.workloadCluster, nil
   313  }
   314  
   315  // InjectTestManagementCluster allows to inject a test ManagementCluster during tests.
   316  // NOTE: This approach allows to keep the managementCluster field private, which will
   317  // prevent people from using managementCluster.GetWorkloadCluster because it creates a new
   318  // instance of WorkloadCluster at every call. People instead should use ControlPlane.GetWorkloadCluster
   319  // that creates only a single instance of WorkloadCluster for each reconcile.
   320  func (c *ControlPlane) InjectTestManagementCluster(managementCluster ManagementCluster) {
   321  	c.managementCluster = managementCluster
   322  	c.workloadCluster = nil
   323  }