sigs.k8s.io/cluster-api@v1.6.3/controlplane/kubeadm/internal/workload_cluster_etcd.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package internal 18 19 import ( 20 "context" 21 22 "github.com/blang/semver/v4" 23 "github.com/pkg/errors" 24 kerrors "k8s.io/apimachinery/pkg/util/errors" 25 26 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 27 bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" 28 "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" 29 etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" 30 ) 31 32 type etcdClientFor interface { 33 forFirstAvailableNode(ctx context.Context, nodeNames []string) (*etcd.Client, error) 34 forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error) 35 } 36 37 // ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes. 38 // If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them. 39 func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) { 40 allRemovedMembers := []string{} 41 allErrs := []error{} 42 for _, nodeName := range nodeNames { 43 removedMembers, errs := w.reconcileEtcdMember(ctx, nodeNames, nodeName, version) 44 allRemovedMembers = append(allRemovedMembers, removedMembers...) 45 allErrs = append(allErrs, errs...) 46 } 47 48 return allRemovedMembers, kerrors.NewAggregate(allErrs) 49 } 50 51 func (w *Workload) reconcileEtcdMember(ctx context.Context, nodeNames []string, nodeName string, version semver.Version) ([]string, []error) { 52 // Create the etcd Client for the etcd Pod scheduled on the Node 53 etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, []string{nodeName}) 54 if err != nil { 55 return nil, nil 56 } 57 defer etcdClient.Close() 58 59 members, err := etcdClient.Members(ctx) 60 if err != nil { 61 return nil, nil 62 } 63 64 // Check if any member's node is missing from workload cluster 65 // If any, delete it with best effort 66 removedMembers := []string{} 67 errs := []error{} 68 loopmembers: 69 for _, member := range members { 70 // If this member is just added, it has a empty name until the etcd pod starts. Ignore it. 71 if member.Name == "" { 72 continue 73 } 74 75 for _, nodeName := range nodeNames { 76 if member.Name == nodeName { 77 // We found the matching node, continue with the outer loop. 78 continue loopmembers 79 } 80 } 81 82 // If we're here, the node cannot be found. 83 removedMembers = append(removedMembers, member.Name) 84 if err := w.removeMemberForNode(ctx, member.Name); err != nil { 85 errs = append(errs, err) 86 } 87 88 if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil { 89 errs = append(errs, err) 90 } 91 } 92 return removedMembers, errs 93 } 94 95 // UpdateEtcdVersionInKubeadmConfigMap sets the imageRepository or the imageTag or both in the kubeadm config map. 96 func (w *Workload) UpdateEtcdVersionInKubeadmConfigMap(ctx context.Context, imageRepository, imageTag string, version semver.Version) error { 97 return w.updateClusterConfiguration(ctx, func(c *bootstrapv1.ClusterConfiguration) { 98 if c.Etcd.Local != nil { 99 c.Etcd.Local.ImageRepository = imageRepository 100 c.Etcd.Local.ImageTag = imageTag 101 } 102 }, version) 103 } 104 105 // UpdateEtcdExtraArgsInKubeadmConfigMap sets extraArgs in the kubeadm config map. 106 func (w *Workload) UpdateEtcdExtraArgsInKubeadmConfigMap(ctx context.Context, extraArgs map[string]string, version semver.Version) error { 107 return w.updateClusterConfiguration(ctx, func(c *bootstrapv1.ClusterConfiguration) { 108 if c.Etcd.Local != nil { 109 c.Etcd.Local.ExtraArgs = extraArgs 110 } 111 }, version) 112 } 113 114 // RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster. 115 // Removing the last remaining member of the cluster is not supported. 116 func (w *Workload) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error { 117 if machine == nil || machine.Status.NodeRef == nil { 118 // Nothing to do, no node for Machine 119 return nil 120 } 121 return w.removeMemberForNode(ctx, machine.Status.NodeRef.Name) 122 } 123 124 func (w *Workload) removeMemberForNode(ctx context.Context, name string) error { 125 controlPlaneNodes, err := w.getControlPlaneNodes(ctx) 126 if err != nil { 127 return err 128 } 129 if len(controlPlaneNodes.Items) < 2 { 130 return ErrControlPlaneMinNodes 131 } 132 133 // Exclude node being removed from etcd client node list 134 var remainingNodes []string 135 for _, n := range controlPlaneNodes.Items { 136 if n.Name != name { 137 remainingNodes = append(remainingNodes, n.Name) 138 } 139 } 140 etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, remainingNodes) 141 if err != nil { 142 return errors.Wrap(err, "failed to create etcd client") 143 } 144 defer etcdClient.Close() 145 146 // List etcd members. This checks that the member is healthy, because the request goes through consensus. 147 members, err := etcdClient.Members(ctx) 148 if err != nil { 149 return errors.Wrap(err, "failed to list etcd members using etcd client") 150 } 151 member := etcdutil.MemberForName(members, name) 152 153 // The member has already been removed, return immediately 154 if member == nil { 155 return nil 156 } 157 158 if err := etcdClient.RemoveMember(ctx, member.ID); err != nil { 159 return errors.Wrap(err, "failed to remove member from etcd") 160 } 161 162 return nil 163 } 164 165 // ForwardEtcdLeadership forwards etcd leadership to the first follower. 166 func (w *Workload) ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error { 167 if machine == nil || machine.Status.NodeRef == nil { 168 return nil 169 } 170 if leaderCandidate == nil { 171 return errors.New("leader candidate cannot be nil") 172 } 173 if leaderCandidate.Status.NodeRef == nil { 174 return errors.New("leader has no node reference") 175 } 176 177 nodes, err := w.getControlPlaneNodes(ctx) 178 if err != nil { 179 return errors.Wrap(err, "failed to list control plane nodes") 180 } 181 nodeNames := make([]string, 0, len(nodes.Items)) 182 for _, node := range nodes.Items { 183 nodeNames = append(nodeNames, node.Name) 184 } 185 etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames) 186 if err != nil { 187 return errors.Wrap(err, "failed to create etcd client") 188 } 189 defer etcdClient.Close() 190 191 members, err := etcdClient.Members(ctx) 192 if err != nil { 193 return errors.Wrap(err, "failed to list etcd members using etcd client") 194 } 195 196 currentMember := etcdutil.MemberForName(members, machine.Status.NodeRef.Name) 197 if currentMember == nil || currentMember.ID != etcdClient.LeaderID { 198 // nothing to do, this is not the etcd leader 199 return nil 200 } 201 202 // Move the leader to the provided candidate. 203 nextLeader := etcdutil.MemberForName(members, leaderCandidate.Status.NodeRef.Name) 204 if nextLeader == nil { 205 return errors.Errorf("failed to get etcd member from node %q", leaderCandidate.Status.NodeRef.Name) 206 } 207 if err := etcdClient.MoveLeader(ctx, nextLeader.ID); err != nil { 208 return errors.Wrapf(err, "failed to move leader") 209 } 210 return nil 211 } 212 213 // EtcdMemberStatus contains status information for a single etcd member. 214 type EtcdMemberStatus struct { 215 Name string 216 Responsive bool 217 } 218 219 // EtcdMembers returns the current set of members in an etcd cluster. 220 // 221 // NOTE: This methods uses control plane machines/nodes only to get in contact with etcd, 222 // but then it relies on etcd as ultimate source of truth for the list of members. 223 // This is intended to allow informed decisions on actions impacting etcd quorum. 224 func (w *Workload) EtcdMembers(ctx context.Context) ([]string, error) { 225 nodes, err := w.getControlPlaneNodes(ctx) 226 if err != nil { 227 return nil, errors.Wrap(err, "failed to list control plane nodes") 228 } 229 nodeNames := make([]string, 0, len(nodes.Items)) 230 for _, node := range nodes.Items { 231 nodeNames = append(nodeNames, node.Name) 232 } 233 etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames) 234 if err != nil { 235 return nil, errors.Wrap(err, "failed to create etcd client") 236 } 237 defer etcdClient.Close() 238 239 members, err := etcdClient.Members(ctx) 240 if err != nil { 241 return nil, errors.Wrap(err, "failed to list etcd members using etcd client") 242 } 243 244 names := []string{} 245 for _, member := range members { 246 names = append(names, member.Name) 247 } 248 return names, nil 249 }