sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/etcd/etcd.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package etcd
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"net"
    23  	"time"
    24  
    25  	"github.com/pkg/errors"
    26  	"go.etcd.io/etcd/api/v3/etcdserverpb"
    27  	clientv3 "go.etcd.io/etcd/client/v3"
    28  	"google.golang.org/grpc"
    29  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    30  
    31  	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/proxy"
    32  )
    33  
    34  // GRPCDial is a function that creates a connection to a given endpoint.
    35  type GRPCDial func(ctx context.Context, addr string) (net.Conn, error)
    36  
    37  // etcd wraps the etcd client from etcd's clientv3 package.
    38  // This interface is implemented by both the clientv3 package and the backoff adapter that adds retries to the client.
    39  type etcd interface {
    40  	AlarmList(ctx context.Context) (*clientv3.AlarmResponse, error)
    41  	Close() error
    42  	Endpoints() []string
    43  	MemberList(ctx context.Context) (*clientv3.MemberListResponse, error)
    44  	MemberRemove(ctx context.Context, id uint64) (*clientv3.MemberRemoveResponse, error)
    45  	MemberUpdate(ctx context.Context, id uint64, peerURLs []string) (*clientv3.MemberUpdateResponse, error)
    46  	MoveLeader(ctx context.Context, id uint64) (*clientv3.MoveLeaderResponse, error)
    47  	Status(ctx context.Context, endpoint string) (*clientv3.StatusResponse, error)
    48  }
    49  
    50  // Client wraps an etcd client formatting its output to something more consumable.
    51  type Client struct {
    52  	EtcdClient  etcd
    53  	Endpoint    string
    54  	LeaderID    uint64
    55  	Errors      []string
    56  	CallTimeout time.Duration
    57  }
    58  
    59  // MemberAlarm represents an alarm type association with a cluster member.
    60  type MemberAlarm struct {
    61  	// MemberID is the ID of the member associated with the raised alarm.
    62  	MemberID uint64
    63  
    64  	// Type is the type of alarm which has been raised.
    65  	Type AlarmType
    66  }
    67  
    68  // AlarmType defines the type of alarm for etcd.
    69  type AlarmType int32
    70  
    71  const (
    72  	// AlarmOK denotes that the cluster member is OK.
    73  	AlarmOK AlarmType = iota
    74  
    75  	// AlarmNoSpace denotes that the cluster member has run out of disk space.
    76  	AlarmNoSpace
    77  
    78  	// AlarmCorrupt denotes that the cluster member has corrupted data.
    79  	AlarmCorrupt
    80  )
    81  
    82  // DefaultCallTimeout represents the duration that the etcd client waits at most
    83  // for read and write operations to etcd.
    84  const DefaultCallTimeout = 15 * time.Second
    85  
    86  // AlarmTypeName provides a text translation for AlarmType codes.
    87  var AlarmTypeName = map[AlarmType]string{
    88  	AlarmOK:      "NONE",
    89  	AlarmNoSpace: "NOSPACE",
    90  	AlarmCorrupt: "CORRUPT",
    91  }
    92  
    93  // Adapted from kubeadm.
    94  
    95  // Member struct defines an etcd member; it is used to avoid spreading
    96  // github.com/coreos/etcd dependencies.
    97  type Member struct {
    98  	// ClusterID is the ID of the cluster to which this member belongs
    99  	ClusterID uint64
   100  
   101  	// ID is the ID of this cluster member
   102  	ID uint64
   103  
   104  	// Name is the human-readable name of the member. If the member is not started, the name will be an empty string.
   105  	Name string
   106  
   107  	// PeerURLs is the list of URLs the member exposes to the cluster for communication.
   108  	PeerURLs []string
   109  
   110  	// ClientURLs is the list of URLs the member exposes to clients for communication. If the member is not started, clientURLs will be empty.
   111  	ClientURLs []string
   112  
   113  	// IsLearner indicates if the member is raft learner.
   114  	IsLearner bool
   115  
   116  	// Alarms is the list of alarms for a member.
   117  	Alarms []AlarmType
   118  }
   119  
   120  // pbMemberToMember converts the protobuf representation of a cluster member to a Member struct.
   121  func pbMemberToMember(m *etcdserverpb.Member) *Member {
   122  	return &Member{
   123  		ID:         m.GetID(),
   124  		Name:       m.GetName(),
   125  		PeerURLs:   m.GetPeerURLs(),
   126  		ClientURLs: m.GetClientURLs(),
   127  		IsLearner:  m.GetIsLearner(),
   128  		Alarms:     []AlarmType{},
   129  	}
   130  }
   131  
   132  // ClientConfiguration describes the configuration for an etcd client.
   133  type ClientConfiguration struct {
   134  	Endpoint    string
   135  	Proxy       proxy.Proxy
   136  	TLSConfig   *tls.Config
   137  	DialTimeout time.Duration
   138  	CallTimeout time.Duration
   139  }
   140  
   141  // NewClient creates a new etcd client with the given configuration.
   142  func NewClient(ctx context.Context, config ClientConfiguration) (*Client, error) {
   143  	dialer, err := proxy.NewDialer(config.Proxy)
   144  	if err != nil {
   145  		return nil, errors.Wrap(err, "unable to create a dialer for etcd client")
   146  	}
   147  
   148  	etcdClient, err := clientv3.New(clientv3.Config{
   149  		Endpoints:   []string{config.Endpoint}, // NOTE: endpoint is used only as a host for certificate validation, the network connection is defined by DialOptions.
   150  		DialTimeout: config.DialTimeout,
   151  		DialOptions: []grpc.DialOption{
   152  			grpc.WithBlock(), // block until the underlying connection is up
   153  			grpc.WithContextDialer(dialer.DialContextWithAddr),
   154  		},
   155  		TLS: config.TLSConfig,
   156  	})
   157  	if err != nil {
   158  		return nil, errors.Wrap(err, "unable to create etcd client")
   159  	}
   160  
   161  	callTimeout := config.CallTimeout
   162  	if callTimeout == 0 {
   163  		callTimeout = DefaultCallTimeout
   164  	}
   165  
   166  	client, err := newEtcdClient(ctx, etcdClient, callTimeout)
   167  	if err != nil {
   168  		closeErr := etcdClient.Close()
   169  		return nil, errors.Wrap(kerrors.NewAggregate([]error{err, closeErr}), "unable to create etcd client")
   170  	}
   171  	return client, nil
   172  }
   173  
   174  func newEtcdClient(ctx context.Context, etcdClient etcd, callTimeout time.Duration) (*Client, error) {
   175  	endpoints := etcdClient.Endpoints()
   176  	if len(endpoints) == 0 {
   177  		return nil, errors.New("etcd client was not configured with any endpoints")
   178  	}
   179  
   180  	ctx, cancel := context.WithTimeout(ctx, callTimeout)
   181  	defer cancel()
   182  
   183  	status, err := etcdClient.Status(ctx, endpoints[0])
   184  	if err != nil {
   185  		return nil, errors.Wrap(err, "failed to get etcd status")
   186  	}
   187  
   188  	return &Client{
   189  		Endpoint:    endpoints[0],
   190  		EtcdClient:  etcdClient,
   191  		LeaderID:    status.Leader,
   192  		Errors:      status.Errors,
   193  		CallTimeout: callTimeout,
   194  	}, nil
   195  }
   196  
   197  // Close closes the etcd client.
   198  func (c *Client) Close() error {
   199  	return c.EtcdClient.Close()
   200  }
   201  
   202  // Members retrieves a list of etcd members.
   203  func (c *Client) Members(ctx context.Context) ([]*Member, error) {
   204  	ctx, cancel := context.WithTimeout(ctx, c.CallTimeout)
   205  	defer cancel()
   206  
   207  	response, err := c.EtcdClient.MemberList(ctx)
   208  	if err != nil {
   209  		return nil, errors.Wrap(err, "failed to get list of members for etcd cluster")
   210  	}
   211  
   212  	alarms, err := c.Alarms(ctx)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  
   217  	clusterID := response.Header.GetClusterId()
   218  	members := make([]*Member, 0)
   219  	for _, m := range response.Members {
   220  		newMember := pbMemberToMember(m)
   221  		newMember.ClusterID = clusterID
   222  		for _, c := range alarms {
   223  			if c.MemberID == newMember.ID {
   224  				newMember.Alarms = append(newMember.Alarms, c.Type)
   225  			}
   226  		}
   227  		members = append(members, newMember)
   228  	}
   229  
   230  	return members, nil
   231  }
   232  
   233  // MoveLeader moves the leader to the provided member ID.
   234  func (c *Client) MoveLeader(ctx context.Context, newLeaderID uint64) error {
   235  	ctx, cancel := context.WithTimeout(ctx, c.CallTimeout)
   236  	defer cancel()
   237  
   238  	_, err := c.EtcdClient.MoveLeader(ctx, newLeaderID)
   239  	return errors.Wrapf(err, "failed to move etcd leader: %v", newLeaderID)
   240  }
   241  
   242  // RemoveMember removes a given member.
   243  func (c *Client) RemoveMember(ctx context.Context, id uint64) error {
   244  	ctx, cancel := context.WithTimeout(ctx, c.CallTimeout)
   245  	defer cancel()
   246  
   247  	_, err := c.EtcdClient.MemberRemove(ctx, id)
   248  	return errors.Wrapf(err, "failed to remove member: %v", id)
   249  }
   250  
   251  // UpdateMemberPeerURLs updates the list of peer URLs.
   252  func (c *Client) UpdateMemberPeerURLs(ctx context.Context, id uint64, peerURLs []string) ([]*Member, error) {
   253  	ctx, cancel := context.WithTimeout(ctx, c.CallTimeout)
   254  	defer cancel()
   255  
   256  	response, err := c.EtcdClient.MemberUpdate(ctx, id, peerURLs)
   257  	if err != nil {
   258  		return nil, errors.Wrapf(err, "failed to update etcd member %v's peer list to %+v", id, peerURLs)
   259  	}
   260  
   261  	members := make([]*Member, 0, len(response.Members))
   262  	for _, m := range response.Members {
   263  		members = append(members, pbMemberToMember(m))
   264  	}
   265  
   266  	return members, nil
   267  }
   268  
   269  // Alarms retrieves all alarms on a cluster.
   270  func (c *Client) Alarms(ctx context.Context) ([]MemberAlarm, error) {
   271  	ctx, cancel := context.WithTimeout(ctx, c.CallTimeout)
   272  	defer cancel()
   273  
   274  	alarmResponse, err := c.EtcdClient.AlarmList(ctx)
   275  	if err != nil {
   276  		return nil, errors.Wrap(err, "failed to get alarms for etcd cluster")
   277  	}
   278  
   279  	memberAlarms := make([]MemberAlarm, 0, len(alarmResponse.Alarms))
   280  	for _, a := range alarmResponse.Alarms {
   281  		memberAlarms = append(memberAlarms, MemberAlarm{
   282  			MemberID: a.GetMemberID(),
   283  			Type:     AlarmType(a.GetAlarm()),
   284  		})
   285  	}
   286  
   287  	return memberAlarms, nil
   288  }