sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/control_plane.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package internal 18 19 import ( 20 "context" 21 22 "github.com/pkg/errors" 23 apierrors "k8s.io/apimachinery/pkg/api/errors" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 26 kerrors "k8s.io/apimachinery/pkg/util/errors" 27 "sigs.k8s.io/controller-runtime/pkg/client" 28 29 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 30 bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" 31 "sigs.k8s.io/cluster-api/controllers/external" 32 controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" 33 "sigs.k8s.io/cluster-api/util/collections" 34 "sigs.k8s.io/cluster-api/util/failuredomains" 35 "sigs.k8s.io/cluster-api/util/patch" 36 ) 37 38 // ControlPlane holds business logic around control planes. 39 // It should never need to connect to a service, that responsibility lies outside of this struct. 40 // Going forward we should be trying to add more logic to here and reduce the amount of logic in the reconciler. 41 type ControlPlane struct { 42 KCP *controlplanev1.KubeadmControlPlane 43 Cluster *clusterv1.Cluster 44 Machines collections.Machines 45 machinesPatchHelpers map[string]*patch.Helper 46 47 // reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations 48 reconciliationTime metav1.Time 49 50 // TODO: we should see if we can combine these with the Machine objects so we don't have all these separate lookups 51 // See discussion on https://github.com/kubernetes-sigs/cluster-api/pull/3405 52 KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig 53 InfraResources map[string]*unstructured.Unstructured 54 55 managementCluster ManagementCluster 56 workloadCluster WorkloadCluster 57 } 58 59 // NewControlPlane returns an instantiated ControlPlane. 60 func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, client client.Client, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, ownedMachines collections.Machines) (*ControlPlane, error) { 61 infraObjects, err := getInfraResources(ctx, client, ownedMachines) 62 if err != nil { 63 return nil, err 64 } 65 kubeadmConfigs, err := getKubeadmConfigs(ctx, client, ownedMachines) 66 if err != nil { 67 return nil, err 68 } 69 patchHelpers := map[string]*patch.Helper{} 70 for _, machine := range ownedMachines { 71 patchHelper, err := patch.NewHelper(machine, client) 72 if err != nil { 73 return nil, err 74 } 75 patchHelpers[machine.Name] = patchHelper 76 } 77 78 return &ControlPlane{ 79 KCP: kcp, 80 Cluster: cluster, 81 Machines: ownedMachines, 82 machinesPatchHelpers: patchHelpers, 83 KubeadmConfigs: kubeadmConfigs, 84 InfraResources: infraObjects, 85 reconciliationTime: metav1.Now(), 86 managementCluster: managementCluster, 87 }, nil 88 } 89 90 // FailureDomains returns a slice of failure domain objects synced from the infrastructure provider into Cluster.Status. 91 func (c *ControlPlane) FailureDomains() clusterv1.FailureDomains { 92 if c.Cluster.Status.FailureDomains == nil { 93 return clusterv1.FailureDomains{} 94 } 95 return c.Cluster.Status.FailureDomains 96 } 97 98 // MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it. 99 func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) (*clusterv1.Machine, error) { 100 fd := c.FailureDomainWithMostMachines(ctx, machines) 101 machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fd)) 102 machineToMark := machinesInFailureDomain.Oldest() 103 if machineToMark == nil { 104 return nil, errors.New("failed to pick control plane Machine to mark for deletion") 105 } 106 return machineToMark, nil 107 } 108 109 // MachineWithDeleteAnnotation returns a machine that has been annotated with DeleteMachineAnnotation key. 110 func (c *ControlPlane) MachineWithDeleteAnnotation(machines collections.Machines) collections.Machines { 111 // See if there are any machines with DeleteMachineAnnotation key. 112 annotatedMachines := machines.Filter(collections.HasAnnotationKey(clusterv1.DeleteMachineAnnotation)) 113 // If there are, return list of annotated machines. 114 return annotatedMachines 115 } 116 117 // FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most 118 // control-plane machines on it. 119 func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) *string { 120 // See if there are any Machines that are not in currently defined failure domains first. 121 notInFailureDomains := machines.Filter( 122 collections.Not(collections.InFailureDomains(c.FailureDomains().FilterControlPlane().GetIDs()...)), 123 ) 124 if len(notInFailureDomains) > 0 { 125 // return the failure domain for the oldest Machine not in the current list of failure domains 126 // this could be either nil (no failure domain defined) or a failure domain that is no longer defined 127 // in the cluster status. 128 return notInFailureDomains.Oldest().Spec.FailureDomain 129 } 130 return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines) 131 } 132 133 // NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines. 134 func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) *string { 135 if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 { 136 return nil 137 } 138 return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines()) 139 } 140 141 // InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane. 142 func (c *ControlPlane) InitialControlPlaneConfig() *bootstrapv1.KubeadmConfigSpec { 143 bootstrapSpec := c.KCP.Spec.KubeadmConfigSpec.DeepCopy() 144 bootstrapSpec.JoinConfiguration = nil 145 return bootstrapSpec 146 } 147 148 // JoinControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for joining control planes. 149 func (c *ControlPlane) JoinControlPlaneConfig() *bootstrapv1.KubeadmConfigSpec { 150 bootstrapSpec := c.KCP.Spec.KubeadmConfigSpec.DeepCopy() 151 bootstrapSpec.InitConfiguration = nil 152 // NOTE: For the joining we are preserving the ClusterConfiguration in order to determine if the 153 // cluster is using an external etcd in the kubeadm bootstrap provider (even if this is not required by kubeadm Join). 154 // TODO: Determine if this copy of cluster configuration can be used for rollouts (thus allowing to remove the annotation at machine level) 155 return bootstrapSpec 156 } 157 158 // HasDeletingMachine returns true if any machine in the control plane is in the process of being deleted. 159 func (c *ControlPlane) HasDeletingMachine() bool { 160 return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0 161 } 162 163 // GetKubeadmConfig returns the KubeadmConfig of a given machine. 164 func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.KubeadmConfig, bool) { 165 kubeadmConfig, ok := c.KubeadmConfigs[machineName] 166 return kubeadmConfig, ok 167 } 168 169 // MachinesNeedingRollout return a list of machines that need to be rolled out. 170 func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string) { 171 // Ignore machines to be deleted. 172 machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) 173 174 // Return machines if they are scheduled for rollout or if with an outdated configuration. 175 machinesNeedingRollout := make(collections.Machines, len(machines)) 176 rolloutReasons := map[string]string{} 177 for _, m := range machines { 178 reason, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m) 179 if needsRollout { 180 machinesNeedingRollout.Insert(m) 181 rolloutReasons[m.Name] = reason 182 } 183 } 184 return machinesNeedingRollout, rolloutReasons 185 } 186 187 // UpToDateMachines returns the machines that are up to date with the control 188 // plane's configuration and therefore do not require rollout. 189 func (c *ControlPlane) UpToDateMachines() collections.Machines { 190 upToDateMachines := make(collections.Machines, len(c.Machines)) 191 for _, m := range c.Machines { 192 _, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m) 193 if !needsRollout { 194 upToDateMachines.Insert(m) 195 } 196 } 197 return upToDateMachines 198 } 199 200 // getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource. 201 func getInfraResources(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*unstructured.Unstructured, error) { 202 result := map[string]*unstructured.Unstructured{} 203 for _, m := range machines { 204 infraObj, err := external.Get(ctx, cl, &m.Spec.InfrastructureRef, m.Namespace) 205 if err != nil { 206 if apierrors.IsNotFound(errors.Cause(err)) { 207 continue 208 } 209 return nil, errors.Wrapf(err, "failed to retrieve infra obj for machine %q", m.Name) 210 } 211 result[m.Name] = infraObj 212 } 213 return result, nil 214 } 215 216 // getKubeadmConfigs fetches the kubeadm config for each machine in the collection and returns a map of machine.Name -> KubeadmConfig. 217 func getKubeadmConfigs(ctx context.Context, cl client.Client, machines collections.Machines) (map[string]*bootstrapv1.KubeadmConfig, error) { 218 result := map[string]*bootstrapv1.KubeadmConfig{} 219 for _, m := range machines { 220 bootstrapRef := m.Spec.Bootstrap.ConfigRef 221 if bootstrapRef == nil { 222 continue 223 } 224 machineConfig := &bootstrapv1.KubeadmConfig{} 225 if err := cl.Get(ctx, client.ObjectKey{Name: bootstrapRef.Name, Namespace: m.Namespace}, machineConfig); err != nil { 226 if apierrors.IsNotFound(errors.Cause(err)) { 227 continue 228 } 229 return nil, errors.Wrapf(err, "failed to retrieve bootstrap config for machine %q", m.Name) 230 } 231 result[m.Name] = machineConfig 232 } 233 return result, nil 234 } 235 236 // IsEtcdManaged returns true if the control plane relies on a managed etcd. 237 func (c *ControlPlane) IsEtcdManaged() bool { 238 return c.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration == nil || c.KCP.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External == nil 239 } 240 241 // UnhealthyMachinesWithUnhealthyControlPlaneComponents returns all unhealthy control plane machines that 242 // have unhealthy control plane components. 243 // It differs from UnhealthyMachinesByHealthCheck which checks `MachineHealthCheck` conditions. 244 func (c *ControlPlane) UnhealthyMachinesWithUnhealthyControlPlaneComponents(machines collections.Machines) collections.Machines { 245 return machines.Filter(collections.HasUnhealthyControlPlaneComponents(c.IsEtcdManaged())) 246 } 247 248 // UnhealthyMachinesByMachineHealthCheck returns the list of control plane machines marked as unhealthy by Machine Health Check. 249 func (c *ControlPlane) UnhealthyMachinesByMachineHealthCheck() collections.Machines { 250 return c.Machines.Filter(collections.HasUnhealthyCondition) 251 } 252 253 // HealthyMachinesByMachineHealthCheck returns the list of control plane machines not marked as unhealthy by Machine Health Check. 254 func (c *ControlPlane) HealthyMachinesByMachineHealthCheck() collections.Machines { 255 return c.Machines.Filter(collections.Not(collections.HasUnhealthyCondition)) 256 } 257 258 // HasUnhealthyMachineByMachineHealthCheck returns true if any machine in the control plane is marked as unhealthy by Machine Health Check. 259 func (c *ControlPlane) HasUnhealthyMachineByMachineHealthCheck() bool { 260 return len(c.UnhealthyMachinesByMachineHealthCheck()) > 0 261 } 262 263 // HasHealthyMachineStillProvisioning returns true if any healthy machine in the control plane is still in the process of being provisioned. 264 func (c *ControlPlane) HasHealthyMachineStillProvisioning() bool { 265 return len(c.HealthyMachinesByMachineHealthCheck().Filter(collections.Not(collections.HasNode()))) > 0 266 } 267 268 // PatchMachines patches all the machines conditions. 269 func (c *ControlPlane) PatchMachines(ctx context.Context) error { 270 errList := []error{} 271 for i := range c.Machines { 272 machine := c.Machines[i] 273 if helper, ok := c.machinesPatchHelpers[machine.Name]; ok { 274 if err := helper.Patch(ctx, machine, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 275 controlplanev1.MachineAPIServerPodHealthyCondition, 276 controlplanev1.MachineControllerManagerPodHealthyCondition, 277 controlplanev1.MachineSchedulerPodHealthyCondition, 278 controlplanev1.MachineEtcdPodHealthyCondition, 279 controlplanev1.MachineEtcdMemberHealthyCondition, 280 }}); err != nil { 281 errList = append(errList, err) 282 } 283 continue 284 } 285 errList = append(errList, errors.Errorf("failed to get patch helper for machine %s", machine.Name)) 286 } 287 return kerrors.NewAggregate(errList) 288 } 289 290 // SetPatchHelpers updates the patch helpers. 291 func (c *ControlPlane) SetPatchHelpers(patchHelpers map[string]*patch.Helper) { 292 if c.machinesPatchHelpers == nil { 293 c.machinesPatchHelpers = map[string]*patch.Helper{} 294 } 295 for machineName, patchHelper := range patchHelpers { 296 c.machinesPatchHelpers[machineName] = patchHelper 297 } 298 } 299 300 // GetWorkloadCluster builds a cluster object. 301 // The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine. 302 func (c *ControlPlane) GetWorkloadCluster(ctx context.Context) (WorkloadCluster, error) { 303 if c.workloadCluster != nil { 304 return c.workloadCluster, nil 305 } 306 307 workloadCluster, err := c.managementCluster.GetWorkloadCluster(ctx, client.ObjectKeyFromObject(c.Cluster)) 308 if err != nil { 309 return nil, err 310 } 311 c.workloadCluster = workloadCluster 312 return c.workloadCluster, nil 313 } 314 315 // InjectTestManagementCluster allows to inject a test ManagementCluster during tests. 316 // NOTE: This approach allows to keep the managementCluster field private, which will 317 // prevent people from using managementCluster.GetWorkloadCluster because it creates a new 318 // instance of WorkloadCluster at every call. People instead should use ControlPlane.GetWorkloadCluster 319 // that creates only a single instance of WorkloadCluster for each reconcile. 320 func (c *ControlPlane) InjectTestManagementCluster(managementCluster ManagementCluster) { 321 c.managementCluster = managementCluster 322 c.workloadCluster = nil 323 }