sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/workload_cluster_etcd.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package internal 18 19 import ( 20 "context" 21 22 "github.com/blang/semver/v4" 23 "github.com/pkg/errors" 24 kerrors "k8s.io/apimachinery/pkg/util/errors" 25 26 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 27 bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" 28 "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" 29 etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" 30 ) 31 32 type etcdClientFor interface { 33 forFirstAvailableNode(ctx context.Context, nodeNames []string) (*etcd.Client, error) 34 forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error) 35 } 36 37 // ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes. 38 // If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them. 39 func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) { 40 allRemovedMembers := []string{} 41 allErrs := []error{} 42 for _, nodeName := range nodeNames { 43 removedMembers, errs := w.reconcileEtcdMember(ctx, nodeNames, nodeName, version) 44 allRemovedMembers = append(allRemovedMembers, removedMembers...) 45 allErrs = append(allErrs, errs...) 46 } 47 48 return allRemovedMembers, kerrors.NewAggregate(allErrs) 49 } 50 51 func (w *Workload) reconcileEtcdMember(ctx context.Context, nodeNames []string, nodeName string, version semver.Version) ([]string, []error) { 52 // Create the etcd Client for the etcd Pod scheduled on the Node 53 etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, []string{nodeName}) 54 if err != nil { 55 return nil, nil 56 } 57 defer etcdClient.Close() 58 59 members, err := etcdClient.Members(ctx) 60 if err != nil { 61 return nil, nil 62 } 63 64 // Check if any member's node is missing from workload cluster 65 // If any, delete it with best effort 66 removedMembers := []string{} 67 errs := []error{} 68 loopmembers: 69 for _, member := range members { 70 // If this member is just added, it has a empty name until the etcd pod starts. Ignore it. 71 if member.Name == "" { 72 continue 73 } 74 75 for _, nodeName := range nodeNames { 76 if member.Name == nodeName { 77 // We found the matching node, continue with the outer loop. 78 continue loopmembers 79 } 80 } 81 82 // If we're here, the node cannot be found. 83 removedMembers = append(removedMembers, member.Name) 84 if err := w.removeMemberForNode(ctx, member.Name); err != nil { 85 errs = append(errs, err) 86 } 87 88 if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil { 89 errs = append(errs, err) 90 } 91 } 92 return removedMembers, errs 93 } 94 95 // UpdateEtcdLocalInKubeadmConfigMap sets etcd local configuration in the kubeadm config map. 96 func (w *Workload) UpdateEtcdLocalInKubeadmConfigMap(etcdLocal *bootstrapv1.LocalEtcd) func(*bootstrapv1.ClusterConfiguration) { 97 return func(c *bootstrapv1.ClusterConfiguration) { 98 if c.Etcd.Local != nil { 99 c.Etcd.Local = etcdLocal 100 } 101 } 102 } 103 104 // UpdateEtcdExternalInKubeadmConfigMap sets etcd external configuration in the kubeadm config map. 105 func (w *Workload) UpdateEtcdExternalInKubeadmConfigMap(etcdExternal *bootstrapv1.ExternalEtcd) func(*bootstrapv1.ClusterConfiguration) { 106 return func(c *bootstrapv1.ClusterConfiguration) { 107 if c.Etcd.External != nil { 108 c.Etcd.External = etcdExternal 109 } 110 } 111 } 112 113 // RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster. 114 // Removing the last remaining member of the cluster is not supported. 115 func (w *Workload) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error { 116 if machine == nil || machine.Status.NodeRef == nil { 117 // Nothing to do, no node for Machine 118 return nil 119 } 120 return w.removeMemberForNode(ctx, machine.Status.NodeRef.Name) 121 } 122 123 func (w *Workload) removeMemberForNode(ctx context.Context, name string) error { 124 controlPlaneNodes, err := w.getControlPlaneNodes(ctx) 125 if err != nil { 126 return err 127 } 128 if len(controlPlaneNodes.Items) < 2 { 129 return ErrControlPlaneMinNodes 130 } 131 132 // Exclude node being removed from etcd client node list 133 var remainingNodes []string 134 for _, n := range controlPlaneNodes.Items { 135 if n.Name != name { 136 remainingNodes = append(remainingNodes, n.Name) 137 } 138 } 139 etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, remainingNodes) 140 if err != nil { 141 return errors.Wrap(err, "failed to create etcd client") 142 } 143 defer etcdClient.Close() 144 145 // List etcd members. This checks that the member is healthy, because the request goes through consensus. 146 members, err := etcdClient.Members(ctx) 147 if err != nil { 148 return errors.Wrap(err, "failed to list etcd members using etcd client") 149 } 150 member := etcdutil.MemberForName(members, name) 151 152 // The member has already been removed, return immediately 153 if member == nil { 154 return nil 155 } 156 157 if err := etcdClient.RemoveMember(ctx, member.ID); err != nil { 158 return errors.Wrap(err, "failed to remove member from etcd") 159 } 160 161 return nil 162 } 163 164 // ForwardEtcdLeadership forwards etcd leadership to the first follower. 165 func (w *Workload) ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error { 166 if machine == nil || machine.Status.NodeRef == nil { 167 return nil 168 } 169 if leaderCandidate == nil { 170 return errors.New("leader candidate cannot be nil") 171 } 172 if leaderCandidate.Status.NodeRef == nil { 173 return errors.New("leader has no node reference") 174 } 175 176 nodes, err := w.getControlPlaneNodes(ctx) 177 if err != nil { 178 return errors.Wrap(err, "failed to list control plane nodes") 179 } 180 nodeNames := make([]string, 0, len(nodes.Items)) 181 for _, node := range nodes.Items { 182 nodeNames = append(nodeNames, node.Name) 183 } 184 etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames) 185 if err != nil { 186 return errors.Wrap(err, "failed to create etcd client") 187 } 188 defer etcdClient.Close() 189 190 members, err := etcdClient.Members(ctx) 191 if err != nil { 192 return errors.Wrap(err, "failed to list etcd members using etcd client") 193 } 194 195 currentMember := etcdutil.MemberForName(members, machine.Status.NodeRef.Name) 196 if currentMember == nil || currentMember.ID != etcdClient.LeaderID { 197 // nothing to do, this is not the etcd leader 198 return nil 199 } 200 201 // Move the leader to the provided candidate. 202 nextLeader := etcdutil.MemberForName(members, leaderCandidate.Status.NodeRef.Name) 203 if nextLeader == nil { 204 return errors.Errorf("failed to get etcd member from node %q", leaderCandidate.Status.NodeRef.Name) 205 } 206 if err := etcdClient.MoveLeader(ctx, nextLeader.ID); err != nil { 207 return errors.Wrapf(err, "failed to move leader") 208 } 209 return nil 210 } 211 212 // EtcdMemberStatus contains status information for a single etcd member. 213 type EtcdMemberStatus struct { 214 Name string 215 Responsive bool 216 } 217 218 // EtcdMembers returns the current set of members in an etcd cluster. 219 // 220 // NOTE: This methods uses control plane machines/nodes only to get in contact with etcd, 221 // but then it relies on etcd as ultimate source of truth for the list of members. 222 // This is intended to allow informed decisions on actions impacting etcd quorum. 223 func (w *Workload) EtcdMembers(ctx context.Context) ([]string, error) { 224 nodes, err := w.getControlPlaneNodes(ctx) 225 if err != nil { 226 return nil, errors.Wrap(err, "failed to list control plane nodes") 227 } 228 nodeNames := make([]string, 0, len(nodes.Items)) 229 for _, node := range nodes.Items { 230 nodeNames = append(nodeNames, node.Name) 231 } 232 etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames) 233 if err != nil { 234 return nil, errors.Wrap(err, "failed to create etcd client") 235 } 236 defer etcdClient.Close() 237 238 members, err := etcdClient.Members(ctx) 239 if err != nil { 240 return nil, errors.Wrap(err, "failed to list etcd members using etcd client") 241 } 242 243 names := []string{} 244 for _, member := range members { 245 names = append(names, member.Name) 246 } 247 return names, nil 248 }