sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/etcd_client_generator.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package internal
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    26  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    27  	"k8s.io/apimachinery/pkg/util/sets"
    28  	"k8s.io/client-go/rest"
    29  
    30  	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
    31  	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/proxy"
    32  )
    33  
    34  // EtcdClientGenerator generates etcd clients that connect to specific etcd members on particular control plane nodes.
    35  type EtcdClientGenerator struct {
    36  	restConfig   *rest.Config
    37  	tlsConfig    *tls.Config
    38  	createClient clientCreator
    39  }
    40  
    41  type clientCreator func(ctx context.Context, endpoint string) (*etcd.Client, error)
    42  
    43  var errEtcdNodeConnection = errors.New("failed to connect to etcd node")
    44  
    45  // NewEtcdClientGenerator returns a new etcdClientGenerator instance.
    46  func NewEtcdClientGenerator(restConfig *rest.Config, tlsConfig *tls.Config, etcdDialTimeout, etcdCallTimeout time.Duration) *EtcdClientGenerator {
    47  	ecg := &EtcdClientGenerator{restConfig: restConfig, tlsConfig: tlsConfig}
    48  
    49  	ecg.createClient = func(ctx context.Context, endpoint string) (*etcd.Client, error) {
    50  		p := proxy.Proxy{
    51  			Kind:       "pods",
    52  			Namespace:  metav1.NamespaceSystem,
    53  			KubeConfig: ecg.restConfig,
    54  			Port:       2379,
    55  		}
    56  		return etcd.NewClient(ctx, etcd.ClientConfiguration{
    57  			Endpoint:    endpoint,
    58  			Proxy:       p,
    59  			TLSConfig:   tlsConfig,
    60  			DialTimeout: etcdDialTimeout,
    61  			CallTimeout: etcdCallTimeout,
    62  		})
    63  	}
    64  
    65  	return ecg
    66  }
    67  
    68  // forFirstAvailableNode takes a list of nodes and returns a client for the first one that connects.
    69  func (c *EtcdClientGenerator) forFirstAvailableNode(ctx context.Context, nodeNames []string) (*etcd.Client, error) {
    70  	// This is an additional safeguard for avoiding this func to return nil, nil.
    71  	if len(nodeNames) == 0 {
    72  		return nil, errors.New("invalid argument: forLeader can't be called with an empty list of nodes")
    73  	}
    74  
    75  	// Loop through the existing control plane nodes.
    76  	var errs []error
    77  	for _, name := range nodeNames {
    78  		endpoint := staticPodName("etcd", name)
    79  		client, err := c.createClient(ctx, endpoint)
    80  		if err != nil {
    81  			errs = append(errs, err)
    82  			continue
    83  		}
    84  		return client, nil
    85  	}
    86  	return nil, errors.Wrap(kerrors.NewAggregate(errs), "could not establish a connection to any etcd node")
    87  }
    88  
    89  // forLeader takes a list of nodes and returns a client to the leader node.
    90  func (c *EtcdClientGenerator) forLeader(ctx context.Context, nodeNames []string) (*etcd.Client, error) {
    91  	// This is an additional safeguard for avoiding this func to return nil, nil.
    92  	if len(nodeNames) == 0 {
    93  		return nil, errors.New("invalid argument: forLeader can't be called with an empty list of nodes")
    94  	}
    95  
    96  	nodes := sets.Set[string]{}
    97  	for _, n := range nodeNames {
    98  		nodes.Insert(n)
    99  	}
   100  
   101  	// Loop through the existing control plane nodes.
   102  	var errs []error
   103  	for _, nodeName := range nodeNames {
   104  		cl, err := c.getLeaderClient(ctx, nodeName, nodes)
   105  		if err != nil {
   106  			if errors.Is(err, errEtcdNodeConnection) {
   107  				errs = append(errs, err)
   108  				continue
   109  			}
   110  			return nil, err
   111  		}
   112  
   113  		return cl, nil
   114  	}
   115  	return nil, errors.Wrap(kerrors.NewAggregate(errs), "could not establish a connection to the etcd leader")
   116  }
   117  
   118  // getLeaderClient provides an etcd client connected to the leader. It returns an
   119  // errEtcdNodeConnection if there was a connection problem with the given etcd
   120  // node, which should be considered non-fatal by the caller.
   121  func (c *EtcdClientGenerator) getLeaderClient(ctx context.Context, nodeName string, allNodes sets.Set[string]) (*etcd.Client, error) {
   122  	// Get a temporary client to the etcd instance hosted on the node.
   123  	client, err := c.forFirstAvailableNode(ctx, []string{nodeName})
   124  	if err != nil {
   125  		return nil, kerrors.NewAggregate([]error{err, errEtcdNodeConnection})
   126  	}
   127  	defer client.Close()
   128  
   129  	// Get the list of members.
   130  	members, err := client.Members(ctx)
   131  	if err != nil {
   132  		return nil, kerrors.NewAggregate([]error{err, errEtcdNodeConnection})
   133  	}
   134  
   135  	// Get the leader member.
   136  	var leaderMember *etcd.Member
   137  	for _, member := range members {
   138  		if member.ID == client.LeaderID {
   139  			leaderMember = member
   140  			break
   141  		}
   142  	}
   143  
   144  	// If we found the leader, and it is one of the nodes,
   145  	// get a connection to the etcd leader via the node hosting it.
   146  	if leaderMember != nil {
   147  		if !allNodes.Has(leaderMember.Name) {
   148  			return nil, errors.Errorf("etcd leader is reported as %x with name %q, but we couldn't find a corresponding Node in the cluster", leaderMember.ID, leaderMember.Name)
   149  		}
   150  		client, err = c.forFirstAvailableNode(ctx, []string{leaderMember.Name})
   151  		return client, err
   152  	}
   153  
   154  	// If it is not possible to get a connection to the leader via existing nodes,
   155  	// it means that the control plane is an invalid state, with an etcd member - the current leader -
   156  	// without a corresponding node.
   157  	// TODO: In future we can eventually try to automatically remediate this condition by moving the leader
   158  	//  to another member with a corresponding node.
   159  	return nil, errors.Errorf("etcd leader is reported as %x, but we couldn't find any matching member", client.LeaderID)
   160  }