sigs.k8s.io/cluster-api@v1.6.3/controlplane/kubeadm/internal/control_plane.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package internal
    18  
    19  import (
    20  	"context"
    21  
    22  	"github.com/pkg/errors"
    23  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    26  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    27  	"sigs.k8s.io/controller-runtime/pkg/client"
    28  
    29  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    30  	bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    31  	"sigs.k8s.io/cluster-api/controllers/external"
    32  	controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
    33  	"sigs.k8s.io/cluster-api/util/collections"
    34  	"sigs.k8s.io/cluster-api/util/failuredomains"
    35  	"sigs.k8s.io/cluster-api/util/patch"
    36  )
    37  
    38  // ControlPlane holds business logic around control planes.
    39  // It should never need to connect to a service, that responsibility lies outside of this struct.
    40  // Going forward we should be trying to add more logic to here and reduce the amount of logic in the reconciler.
    41  type ControlPlane struct {
    42  	KCP                  *controlplanev1.KubeadmControlPlane
    43  	Cluster              *clusterv1.Cluster
    44  	Machines             collections.Machines
    45  	machinesPatchHelpers map[string]*patch.Helper
    46  
    47  	// reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations
    48  	reconciliationTime metav1.Time
    49  
    50  	// TODO: we should see if we can combine these with the Machine objects so we don't have all these separate lookups
    51  	// See discussion on https://github.com/kubernetes-sigs/cluster-api/pull/3405
    52  	KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig
    53  	InfraResources map[string]*unstructured.Unstructured
    54  
    55  	managementCluster ManagementCluster
    56  	workloadCluster   WorkloadCluster
    57  }
    58  
    59  // NewControlPlane returns an instantiated ControlPlane.
    60  func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) {
    61  	infraObjects, err := getInfraResources(ctx, client, ownedMachines)
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  	kubeadmConfigs, err := getKubeadmConfigs(ctx, client, ownedMachines)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	patchHelpers := map[string]*patch.Helper{}
    70  	for _, machine := range ownedMachines {
    71  		patchHelper, err := patch.NewHelper(machine, client)
    72  		if err != nil {
    73  			return nil, errors.Wrapf(err, "failed to create patch helper for machine %s", machine.Name)
    74  		}
    75  		patchHelpers[machine.Name] = patchHelper
    76  	}
    77  
    78  	return &ControlPlane{
    79  		KCP:                  kcp,
    80  		Cluster:              cluster,
    81  		Machines:             ownedMachines,
    82  		machinesPatchHelpers: patchHelpers,
    83  		KubeadmConfigs:       kubeadmConfigs,
    84  		InfraResources:       infraObjects,
    85  		reconciliationTime:   metav1.Now(),
    86  		managementCluster:    managementCluster,
    87  	}, nil
    88  }
    89  
    90  // FailureDomains returns a slice of failure domain objects synced from the infrastructure provider into Cluster.Status.
    91  func (c *ControlPlane) FailureDomains() clusterv1.FailureDomains {
    92  	if c.Cluster.Status.FailureDomains == nil {
    93  		return clusterv1.FailureDomains{}
    94  	}
    95  	return c.Cluster.Status.FailureDomains
    96  }
    97  
    98  // MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it.
    99  func (c *ControlPlane) MachineInFailureDomainWithMostMachines(machines collections.Machines) (*clusterv1.Machine, error) {
   100  	fd := c.FailureDomainWithMostMachines(machines)
   101  	machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fd))
   102  	machineToMark := machinesInFailureDomain.Oldest()
   103  	if machineToMark == nil {
   104  		return nil, errors.New("failed to pick control plane Machine to mark for deletion")
   105  	}
   106  	return machineToMark, nil
   107  }
   108  
   109  // MachineWithDeleteAnnotation returns a machine that has been annotated with DeleteMachineAnnotation key.
   110  func (c *ControlPlane) MachineWithDeleteAnnotation(machines collections.Machines) collections.Machines {
   111  	// See if there are any machines with DeleteMachineAnnotation key.
   112  	annotatedMachines := machines.Filter(collections.HasAnnotationKey(clusterv1.DeleteMachineAnnotation))
   113  	// If there are, return list of annotated machines.
   114  	return annotatedMachines
   115  }
   116  
   117  // FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most
   118  // control-plane machines on it.
   119  func (c *ControlPlane) FailureDomainWithMostMachines(machines collections.Machines) *string {
   120  	// See if there are any Machines that are not in currently defined failure domains first.
   121  	notInFailureDomains := machines.Filter(
   122  		collections.Not(collections.InFailureDomains(c.FailureDomains().FilterControlPlane().GetIDs()...)),
   123  	)
   124  	if len(notInFailureDomains) > 0 {
   125  		// return the failure domain for the oldest Machine not in the current list of failure domains
   126  		// this could be either nil (no failure domain defined) or a failure domain that is no longer defined
   127  		// in the cluster status.
   128  		return notInFailureDomains.Oldest().Spec.FailureDomain
   129  	}
   130  	return failuredomains.PickMost(c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)
   131  }
   132  
   133  // NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines.
   134  func (c *ControlPlane) NextFailureDomainForScaleUp() *string {
   135  	if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
   136  		return nil
   137  	}
   138  	return failuredomains.PickFewest(c.FailureDomains().FilterControlPlane(), c.UpToDateMachines())
   139  }
   140  
   141  // InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
   142  func (c *ControlPlane) InitialControlPlaneConfig() *bootstrapv1.KubeadmConfigSpec {
   143  	bootstrapSpec := c.KCP.Spec.KubeadmConfigSpec.DeepCopy()
   144  	bootstrapSpec.JoinConfiguration = nil
   145  	return bootstrapSpec
   146  }
   147  
   148  // JoinControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for joining control planes.
   149  func (c *ControlPlane) JoinControlPlaneConfig() *bootstrapv1.KubeadmConfigSpec {
   150  	bootstrapSpec := c.KCP.Spec.KubeadmConfigSpec.DeepCopy()
   151  	bootstrapSpec.InitConfiguration = nil
   152  	// NOTE: For the joining we are preserving the ClusterConfiguration in order to determine if the
   153  	// cluster is using an external etcd in the kubeadm bootstrap provider (even if this is not required by kubeadm Join).
   154  	// TODO: Determine if this copy of cluster configuration can be used for rollouts (thus allowing to remove the annotation at machine level)
   155  	return bootstrapSpec
   156  }
   157  
   158  // HasDeletingMachine returns true if any machine in the control plane is in the process of being deleted.
   159  func (c *ControlPlane) HasDeletingMachine() bool {
   160  	return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0
   161  }
   162  
   163  // GetKubeadmConfig returns the KubeadmConfig of a given machine.
   164  func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.KubeadmConfig, bool) {
   165  	kubeadmConfig, ok := c.KubeadmConfigs[machineName]
   166  	return kubeadmConfig, ok
   167  }
   168  
   169  // MachinesNeedingRollout return a list of machines that need to be rolled out.
   170  func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string) {
   171  	// Ignore machines to be deleted.
   172  	machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
   173  
   174  	// Return machines if they are scheduled for rollout or if with an outdated configuration.
   175  	machinesNeedingRollout := make(collections.Machines, len(machines))
   176  	rolloutReasons := map[string]string{}
   177  	for _, m := range machines {
   178  		reason, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
   179  		if needsRollout {
   180  			machinesNeedingRollout.Insert(m)
   181  			rolloutReasons[m.Name] = reason
   182  		}
   183  	}
   184  	return machinesNeedingRollout, rolloutReasons
   185  }
   186  
   187  // UpToDateMachines returns the machines that are up to date with the control
   188  // plane's configuration and therefore do not require rollout.
   189  func (c *ControlPlane) UpToDateMachines() collections.Machines {
   190  	upToDateMachines := make(collections.Machines, len(c.Machines))
   191  	for _, m := range c.Machines {
   192  		_, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
   193  		if !needsRollout {
   194  			upToDateMachines.Insert(m)
   195  		}
   196  	}
   197  	return upToDateMachines
   198  }
   199  
   200  // getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource.
   201  func getInfraResources(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*unstructured.Unstructured, error) {
   202  	result := map[string]*unstructured.Unstructured{}
   203  	for _, m := range machines {
   204  		infraObj, err := external.Get(ctx, cl, &m.Spec.InfrastructureRef, m.Namespace)
   205  		if err != nil {
   206  			if apierrors.IsNotFound(errors.Cause(err)) {
   207  				continue
   208  			}
   209  			return nil, errors.Wrapf(err, "failed to retrieve infra obj for machine %q", m.Name)
   210  		}
   211  		result[m.Name] = infraObj
   212  	}
   213  	return result, nil
   214  }
   215  
   216  // getKubeadmConfigs fetches the kubeadm config for each machine in the collection and returns a map of machine.Name -> KubeadmConfig.
   217  func getKubeadmConfigs(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*bootstrapv1.KubeadmConfig, error) {
   218  	result := map[string]*bootstrapv1.KubeadmConfig{}
   219  	for _, m := range machines {
   220  		bootstrapRef := m.Spec.Bootstrap.ConfigRef
   221  		if bootstrapRef == nil {
   222  			continue
   223  		}
   224  		machineConfig := &bootstrapv1.KubeadmConfig{}
   225  		if err := cl.Get(ctx, client.ObjectKey{Name: bootstrapRef.Name, Namespace: m.Namespace}, machineConfig); err != nil {
   226  			if apierrors.IsNotFound(errors.Cause(err)) {
   227  				continue
   228  			}
   229  			return nil, errors.Wrapf(err, "failed to retrieve bootstrap config for machine %q", m.Name)
   230  		}
   231  		result[m.Name] = machineConfig
   232  	}
   233  	return result, nil
   234  }
   235  
   236  // IsEtcdManaged returns true if the control plane relies on a managed etcd.
   237  func (c *ControlPlane) IsEtcdManaged() bool {
   238  	return c.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration == nil || c.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External == nil
   239  }
   240  
   241  // UnhealthyMachines returns the list of control plane machines marked as unhealthy by MHC.
   242  func (c *ControlPlane) UnhealthyMachines() collections.Machines {
   243  	return c.Machines.Filter(collections.HasUnhealthyCondition)
   244  }
   245  
   246  // HealthyMachines returns the list of control plane machines not marked as unhealthy by MHC.
   247  func (c *ControlPlane) HealthyMachines() collections.Machines {
   248  	return c.Machines.Filter(collections.Not(collections.HasUnhealthyCondition))
   249  }
   250  
   251  // HasUnhealthyMachine returns true if any machine in the control plane is marked as unhealthy by MHC.
   252  func (c *ControlPlane) HasUnhealthyMachine() bool {
   253  	return len(c.UnhealthyMachines()) > 0
   254  }
   255  
   256  // PatchMachines patches all the machines conditions.
   257  func (c *ControlPlane) PatchMachines(ctx context.Context) error {
   258  	errList := []error{}
   259  	for i := range c.Machines {
   260  		machine := c.Machines[i]
   261  		if helper, ok := c.machinesPatchHelpers[machine.Name]; ok {
   262  			if err := helper.Patch(ctx, machine, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   263  				controlplanev1.MachineAPIServerPodHealthyCondition,
   264  				controlplanev1.MachineControllerManagerPodHealthyCondition,
   265  				controlplanev1.MachineSchedulerPodHealthyCondition,
   266  				controlplanev1.MachineEtcdPodHealthyCondition,
   267  				controlplanev1.MachineEtcdMemberHealthyCondition,
   268  			}}); err != nil {
   269  				errList = append(errList, errors.Wrapf(err, "failed to patch machine %s", machine.Name))
   270  			}
   271  			continue
   272  		}
   273  		errList = append(errList, errors.Errorf("failed to get patch helper for machine %s", machine.Name))
   274  	}
   275  	return kerrors.NewAggregate(errList)
   276  }
   277  
   278  // SetPatchHelpers updates the patch helpers.
   279  func (c *ControlPlane) SetPatchHelpers(patchHelpers map[string]*patch.Helper) {
   280  	if c.machinesPatchHelpers == nil {
   281  		c.machinesPatchHelpers = map[string]*patch.Helper{}
   282  	}
   283  	for machineName, patchHelper := range patchHelpers {
   284  		c.machinesPatchHelpers[machineName] = patchHelper
   285  	}
   286  }
   287  
   288  // GetWorkloadCluster builds a cluster object.
   289  // The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine.
   290  func (c *ControlPlane) GetWorkloadCluster(ctx context.Context) (WorkloadCluster, error) {
   291  	if c.workloadCluster != nil {
   292  		return c.workloadCluster, nil
   293  	}
   294  
   295  	workloadCluster, err := c.managementCluster.GetWorkloadCluster(ctx, client.ObjectKeyFromObject(c.Cluster))
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  	c.workloadCluster = workloadCluster
   300  	return c.workloadCluster, nil
   301  }
   302  
   303  // InjectTestManagementCluster allows to inject a test ManagementCluster during tests.
   304  // NOTE: This approach allows to keep the managementCluster field private, which will
   305  // prevent people from using managementCluster.GetWorkloadCluster because it creates a new
   306  // instance of WorkloadCluster at every call. People instead should use ControlPlane.GetWorkloadCluster
   307  // that creates only a single instance of WorkloadCluster for each reconcile.
   308  func (c *ControlPlane) InjectTestManagementCluster(managementCluster ManagementCluster) {
   309  	c.managementCluster = managementCluster
   310  	c.workloadCluster = nil
   311  }