sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/workload_cluster_etcd.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package internal
    18  
    19  import (
    20  	"context"
    21  
    22  	"github.com/blang/semver/v4"
    23  	"github.com/pkg/errors"
    24  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    25  
    26  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    27  	bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
    29  	etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util"
    30  )
    31  
    32  type etcdClientFor interface {
    33  	forFirstAvailableNode(ctx context.Context, nodeNames []string) (*etcd.Client, error)
    34  	forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error)
    35  }
    36  
    37  // ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes.
    38  // If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them.
    39  func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) {
    40  	allRemovedMembers := []string{}
    41  	allErrs := []error{}
    42  	for _, nodeName := range nodeNames {
    43  		removedMembers, errs := w.reconcileEtcdMember(ctx, nodeNames, nodeName, version)
    44  		allRemovedMembers = append(allRemovedMembers, removedMembers...)
    45  		allErrs = append(allErrs, errs...)
    46  	}
    47  
    48  	return allRemovedMembers, kerrors.NewAggregate(allErrs)
    49  }
    50  
    51  func (w *Workload) reconcileEtcdMember(ctx context.Context, nodeNames []string, nodeName string, version semver.Version) ([]string, []error) {
    52  	// Create the etcd Client for the etcd Pod scheduled on the Node
    53  	etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, []string{nodeName})
    54  	if err != nil {
    55  		return nil, nil
    56  	}
    57  	defer etcdClient.Close()
    58  
    59  	members, err := etcdClient.Members(ctx)
    60  	if err != nil {
    61  		return nil, nil
    62  	}
    63  
    64  	// Check if any member's node is missing from workload cluster
    65  	// If any, delete it with best effort
    66  	removedMembers := []string{}
    67  	errs := []error{}
    68  loopmembers:
    69  	for _, member := range members {
    70  		// If this member is just added, it has a empty name until the etcd pod starts. Ignore it.
    71  		if member.Name == "" {
    72  			continue
    73  		}
    74  
    75  		for _, nodeName := range nodeNames {
    76  			if member.Name == nodeName {
    77  				// We found the matching node, continue with the outer loop.
    78  				continue loopmembers
    79  			}
    80  		}
    81  
    82  		// If we're here, the node cannot be found.
    83  		removedMembers = append(removedMembers, member.Name)
    84  		if err := w.removeMemberForNode(ctx, member.Name); err != nil {
    85  			errs = append(errs, err)
    86  		}
    87  
    88  		if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil {
    89  			errs = append(errs, err)
    90  		}
    91  	}
    92  	return removedMembers, errs
    93  }
    94  
    95  // UpdateEtcdLocalInKubeadmConfigMap sets etcd local configuration in the kubeadm config map.
    96  func (w *Workload) UpdateEtcdLocalInKubeadmConfigMap(etcdLocal *bootstrapv1.LocalEtcd) func(*bootstrapv1.ClusterConfiguration) {
    97  	return func(c *bootstrapv1.ClusterConfiguration) {
    98  		if c.Etcd.Local != nil {
    99  			c.Etcd.Local = etcdLocal
   100  		}
   101  	}
   102  }
   103  
   104  // UpdateEtcdExternalInKubeadmConfigMap sets etcd external configuration in the kubeadm config map.
   105  func (w *Workload) UpdateEtcdExternalInKubeadmConfigMap(etcdExternal *bootstrapv1.ExternalEtcd) func(*bootstrapv1.ClusterConfiguration) {
   106  	return func(c *bootstrapv1.ClusterConfiguration) {
   107  		if c.Etcd.External != nil {
   108  			c.Etcd.External = etcdExternal
   109  		}
   110  	}
   111  }
   112  
   113  // RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster.
   114  // Removing the last remaining member of the cluster is not supported.
   115  func (w *Workload) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error {
   116  	if machine == nil || machine.Status.NodeRef == nil {
   117  		// Nothing to do, no node for Machine
   118  		return nil
   119  	}
   120  	return w.removeMemberForNode(ctx, machine.Status.NodeRef.Name)
   121  }
   122  
   123  func (w *Workload) removeMemberForNode(ctx context.Context, name string) error {
   124  	controlPlaneNodes, err := w.getControlPlaneNodes(ctx)
   125  	if err != nil {
   126  		return err
   127  	}
   128  	if len(controlPlaneNodes.Items) < 2 {
   129  		return ErrControlPlaneMinNodes
   130  	}
   131  
   132  	// Exclude node being removed from etcd client node list
   133  	var remainingNodes []string
   134  	for _, n := range controlPlaneNodes.Items {
   135  		if n.Name != name {
   136  			remainingNodes = append(remainingNodes, n.Name)
   137  		}
   138  	}
   139  	etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, remainingNodes)
   140  	if err != nil {
   141  		return errors.Wrap(err, "failed to create etcd client")
   142  	}
   143  	defer etcdClient.Close()
   144  
   145  	// List etcd members. This checks that the member is healthy, because the request goes through consensus.
   146  	members, err := etcdClient.Members(ctx)
   147  	if err != nil {
   148  		return errors.Wrap(err, "failed to list etcd members using etcd client")
   149  	}
   150  	member := etcdutil.MemberForName(members, name)
   151  
   152  	// The member has already been removed, return immediately
   153  	if member == nil {
   154  		return nil
   155  	}
   156  
   157  	if err := etcdClient.RemoveMember(ctx, member.ID); err != nil {
   158  		return errors.Wrap(err, "failed to remove member from etcd")
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  // ForwardEtcdLeadership forwards etcd leadership to the first follower.
   165  func (w *Workload) ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error {
   166  	if machine == nil || machine.Status.NodeRef == nil {
   167  		return nil
   168  	}
   169  	if leaderCandidate == nil {
   170  		return errors.New("leader candidate cannot be nil")
   171  	}
   172  	if leaderCandidate.Status.NodeRef == nil {
   173  		return errors.New("leader has no node reference")
   174  	}
   175  
   176  	nodes, err := w.getControlPlaneNodes(ctx)
   177  	if err != nil {
   178  		return errors.Wrap(err, "failed to list control plane nodes")
   179  	}
   180  	nodeNames := make([]string, 0, len(nodes.Items))
   181  	for _, node := range nodes.Items {
   182  		nodeNames = append(nodeNames, node.Name)
   183  	}
   184  	etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames)
   185  	if err != nil {
   186  		return errors.Wrap(err, "failed to create etcd client")
   187  	}
   188  	defer etcdClient.Close()
   189  
   190  	members, err := etcdClient.Members(ctx)
   191  	if err != nil {
   192  		return errors.Wrap(err, "failed to list etcd members using etcd client")
   193  	}
   194  
   195  	currentMember := etcdutil.MemberForName(members, machine.Status.NodeRef.Name)
   196  	if currentMember == nil || currentMember.ID != etcdClient.LeaderID {
   197  		// nothing to do, this is not the etcd leader
   198  		return nil
   199  	}
   200  
   201  	// Move the leader to the provided candidate.
   202  	nextLeader := etcdutil.MemberForName(members, leaderCandidate.Status.NodeRef.Name)
   203  	if nextLeader == nil {
   204  		return errors.Errorf("failed to get etcd member from node %q", leaderCandidate.Status.NodeRef.Name)
   205  	}
   206  	if err := etcdClient.MoveLeader(ctx, nextLeader.ID); err != nil {
   207  		return errors.Wrapf(err, "failed to move leader")
   208  	}
   209  	return nil
   210  }
   211  
   212  // EtcdMemberStatus contains status information for a single etcd member.
   213  type EtcdMemberStatus struct {
   214  	Name       string
   215  	Responsive bool
   216  }
   217  
   218  // EtcdMembers returns the current set of members in an etcd cluster.
   219  //
   220  // NOTE: This methods uses control plane machines/nodes only to get in contact with etcd,
   221  // but then it relies on etcd as ultimate source of truth for the list of members.
   222  // This is intended to allow informed decisions on actions impacting etcd quorum.
   223  func (w *Workload) EtcdMembers(ctx context.Context) ([]string, error) {
   224  	nodes, err := w.getControlPlaneNodes(ctx)
   225  	if err != nil {
   226  		return nil, errors.Wrap(err, "failed to list control plane nodes")
   227  	}
   228  	nodeNames := make([]string, 0, len(nodes.Items))
   229  	for _, node := range nodes.Items {
   230  		nodeNames = append(nodeNames, node.Name)
   231  	}
   232  	etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames)
   233  	if err != nil {
   234  		return nil, errors.Wrap(err, "failed to create etcd client")
   235  	}
   236  	defer etcdClient.Close()
   237  
   238  	members, err := etcdClient.Members(ctx)
   239  	if err != nil {
   240  		return nil, errors.Wrap(err, "failed to list etcd members using etcd client")
   241  	}
   242  
   243  	names := []string{}
   244  	for _, member := range members {
   245  		names = append(names, member.Name)
   246  	}
   247  	return names, nil
   248  }