sigs.k8s.io/cluster-api@v1.6.3/controlplane/kubeadm/internal/workload_cluster_etcd.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package internal
    18  
    19  import (
    20  	"context"
    21  
    22  	"github.com/blang/semver/v4"
    23  	"github.com/pkg/errors"
    24  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    25  
    26  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    27  	bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
    29  	etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util"
    30  )
    31  
    32  type etcdClientFor interface {
    33  	forFirstAvailableNode(ctx context.Context, nodeNames []string) (*etcd.Client, error)
    34  	forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error)
    35  }
    36  
    37  // ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes.
    38  // If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them.
    39  func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) {
    40  	allRemovedMembers := []string{}
    41  	allErrs := []error{}
    42  	for _, nodeName := range nodeNames {
    43  		removedMembers, errs := w.reconcileEtcdMember(ctx, nodeNames, nodeName, version)
    44  		allRemovedMembers = append(allRemovedMembers, removedMembers...)
    45  		allErrs = append(allErrs, errs...)
    46  	}
    47  
    48  	return allRemovedMembers, kerrors.NewAggregate(allErrs)
    49  }
    50  
    51  func (w *Workload) reconcileEtcdMember(ctx context.Context, nodeNames []string, nodeName string, version semver.Version) ([]string, []error) {
    52  	// Create the etcd Client for the etcd Pod scheduled on the Node
    53  	etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, []string{nodeName})
    54  	if err != nil {
    55  		return nil, nil
    56  	}
    57  	defer etcdClient.Close()
    58  
    59  	members, err := etcdClient.Members(ctx)
    60  	if err != nil {
    61  		return nil, nil
    62  	}
    63  
    64  	// Check if any member's node is missing from workload cluster
    65  	// If any, delete it with best effort
    66  	removedMembers := []string{}
    67  	errs := []error{}
    68  loopmembers:
    69  	for _, member := range members {
    70  		// If this member is just added, it has a empty name until the etcd pod starts. Ignore it.
    71  		if member.Name == "" {
    72  			continue
    73  		}
    74  
    75  		for _, nodeName := range nodeNames {
    76  			if member.Name == nodeName {
    77  				// We found the matching node, continue with the outer loop.
    78  				continue loopmembers
    79  			}
    80  		}
    81  
    82  		// If we're here, the node cannot be found.
    83  		removedMembers = append(removedMembers, member.Name)
    84  		if err := w.removeMemberForNode(ctx, member.Name); err != nil {
    85  			errs = append(errs, err)
    86  		}
    87  
    88  		if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil {
    89  			errs = append(errs, err)
    90  		}
    91  	}
    92  	return removedMembers, errs
    93  }
    94  
    95  // UpdateEtcdVersionInKubeadmConfigMap sets the imageRepository or the imageTag or both in the kubeadm config map.
    96  func (w *Workload) UpdateEtcdVersionInKubeadmConfigMap(ctx context.Context, imageRepository, imageTag string, version semver.Version) error {
    97  	return w.updateClusterConfiguration(ctx, func(c *bootstrapv1.ClusterConfiguration) {
    98  		if c.Etcd.Local != nil {
    99  			c.Etcd.Local.ImageRepository = imageRepository
   100  			c.Etcd.Local.ImageTag = imageTag
   101  		}
   102  	}, version)
   103  }
   104  
   105  // UpdateEtcdExtraArgsInKubeadmConfigMap sets extraArgs in the kubeadm config map.
   106  func (w *Workload) UpdateEtcdExtraArgsInKubeadmConfigMap(ctx context.Context, extraArgs map[string]string, version semver.Version) error {
   107  	return w.updateClusterConfiguration(ctx, func(c *bootstrapv1.ClusterConfiguration) {
   108  		if c.Etcd.Local != nil {
   109  			c.Etcd.Local.ExtraArgs = extraArgs
   110  		}
   111  	}, version)
   112  }
   113  
   114  // RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster.
   115  // Removing the last remaining member of the cluster is not supported.
   116  func (w *Workload) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error {
   117  	if machine == nil || machine.Status.NodeRef == nil {
   118  		// Nothing to do, no node for Machine
   119  		return nil
   120  	}
   121  	return w.removeMemberForNode(ctx, machine.Status.NodeRef.Name)
   122  }
   123  
   124  func (w *Workload) removeMemberForNode(ctx context.Context, name string) error {
   125  	controlPlaneNodes, err := w.getControlPlaneNodes(ctx)
   126  	if err != nil {
   127  		return err
   128  	}
   129  	if len(controlPlaneNodes.Items) < 2 {
   130  		return ErrControlPlaneMinNodes
   131  	}
   132  
   133  	// Exclude node being removed from etcd client node list
   134  	var remainingNodes []string
   135  	for _, n := range controlPlaneNodes.Items {
   136  		if n.Name != name {
   137  			remainingNodes = append(remainingNodes, n.Name)
   138  		}
   139  	}
   140  	etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, remainingNodes)
   141  	if err != nil {
   142  		return errors.Wrap(err, "failed to create etcd client")
   143  	}
   144  	defer etcdClient.Close()
   145  
   146  	// List etcd members. This checks that the member is healthy, because the request goes through consensus.
   147  	members, err := etcdClient.Members(ctx)
   148  	if err != nil {
   149  		return errors.Wrap(err, "failed to list etcd members using etcd client")
   150  	}
   151  	member := etcdutil.MemberForName(members, name)
   152  
   153  	// The member has already been removed, return immediately
   154  	if member == nil {
   155  		return nil
   156  	}
   157  
   158  	if err := etcdClient.RemoveMember(ctx, member.ID); err != nil {
   159  		return errors.Wrap(err, "failed to remove member from etcd")
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  // ForwardEtcdLeadership forwards etcd leadership to the first follower.
   166  func (w *Workload) ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error {
   167  	if machine == nil || machine.Status.NodeRef == nil {
   168  		return nil
   169  	}
   170  	if leaderCandidate == nil {
   171  		return errors.New("leader candidate cannot be nil")
   172  	}
   173  	if leaderCandidate.Status.NodeRef == nil {
   174  		return errors.New("leader has no node reference")
   175  	}
   176  
   177  	nodes, err := w.getControlPlaneNodes(ctx)
   178  	if err != nil {
   179  		return errors.Wrap(err, "failed to list control plane nodes")
   180  	}
   181  	nodeNames := make([]string, 0, len(nodes.Items))
   182  	for _, node := range nodes.Items {
   183  		nodeNames = append(nodeNames, node.Name)
   184  	}
   185  	etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames)
   186  	if err != nil {
   187  		return errors.Wrap(err, "failed to create etcd client")
   188  	}
   189  	defer etcdClient.Close()
   190  
   191  	members, err := etcdClient.Members(ctx)
   192  	if err != nil {
   193  		return errors.Wrap(err, "failed to list etcd members using etcd client")
   194  	}
   195  
   196  	currentMember := etcdutil.MemberForName(members, machine.Status.NodeRef.Name)
   197  	if currentMember == nil || currentMember.ID != etcdClient.LeaderID {
   198  		// nothing to do, this is not the etcd leader
   199  		return nil
   200  	}
   201  
   202  	// Move the leader to the provided candidate.
   203  	nextLeader := etcdutil.MemberForName(members, leaderCandidate.Status.NodeRef.Name)
   204  	if nextLeader == nil {
   205  		return errors.Errorf("failed to get etcd member from node %q", leaderCandidate.Status.NodeRef.Name)
   206  	}
   207  	if err := etcdClient.MoveLeader(ctx, nextLeader.ID); err != nil {
   208  		return errors.Wrapf(err, "failed to move leader")
   209  	}
   210  	return nil
   211  }
   212  
   213  // EtcdMemberStatus contains status information for a single etcd member.
   214  type EtcdMemberStatus struct {
   215  	Name       string
   216  	Responsive bool
   217  }
   218  
   219  // EtcdMembers returns the current set of members in an etcd cluster.
   220  //
   221  // NOTE: This methods uses control plane machines/nodes only to get in contact with etcd,
   222  // but then it relies on etcd as ultimate source of truth for the list of members.
   223  // This is intended to allow informed decisions on actions impacting etcd quorum.
   224  func (w *Workload) EtcdMembers(ctx context.Context) ([]string, error) {
   225  	nodes, err := w.getControlPlaneNodes(ctx)
   226  	if err != nil {
   227  		return nil, errors.Wrap(err, "failed to list control plane nodes")
   228  	}
   229  	nodeNames := make([]string, 0, len(nodes.Items))
   230  	for _, node := range nodes.Items {
   231  		nodeNames = append(nodeNames, node.Name)
   232  	}
   233  	etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames)
   234  	if err != nil {
   235  		return nil, errors.Wrap(err, "failed to create etcd client")
   236  	}
   237  	defer etcdClient.Close()
   238  
   239  	members, err := etcdClient.Members(ctx)
   240  	if err != nil {
   241  		return nil, errors.Wrap(err, "failed to list etcd members using etcd client")
   242  	}
   243  
   244  	names := []string{}
   245  	for _, member := range members {
   246  		names = append(names, member.Name)
   247  	}
   248  	return names, nil
   249  }