sigs.k8s.io/cluster-api@v1.7.1/controlplane/kubeadm/internal/etcd/etcd.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package etcd 18 19 import ( 20 "context" 21 "crypto/tls" 22 "net" 23 "time" 24 25 "github.com/pkg/errors" 26 "go.etcd.io/etcd/api/v3/etcdserverpb" 27 clientv3 "go.etcd.io/etcd/client/v3" 28 "google.golang.org/grpc" 29 kerrors "k8s.io/apimachinery/pkg/util/errors" 30 31 "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/proxy" 32 ) 33 34 // GRPCDial is a function that creates a connection to a given endpoint. 35 type GRPCDial func(ctx context.Context, addr string) (net.Conn, error) 36 37 // etcd wraps the etcd client from etcd's clientv3 package. 38 // This interface is implemented by both the clientv3 package and the backoff adapter that adds retries to the client. 39 type etcd interface { 40 AlarmList(ctx context.Context) (*clientv3.AlarmResponse, error) 41 Close() error 42 Endpoints() []string 43 MemberList(ctx context.Context) (*clientv3.MemberListResponse, error) 44 MemberRemove(ctx context.Context, id uint64) (*clientv3.MemberRemoveResponse, error) 45 MemberUpdate(ctx context.Context, id uint64, peerURLs []string) (*clientv3.MemberUpdateResponse, error) 46 MoveLeader(ctx context.Context, id uint64) (*clientv3.MoveLeaderResponse, error) 47 Status(ctx context.Context, endpoint string) (*clientv3.StatusResponse, error) 48 } 49 50 // Client wraps an etcd client formatting its output to something more consumable. 51 type Client struct { 52 EtcdClient etcd 53 Endpoint string 54 LeaderID uint64 55 Errors []string 56 CallTimeout time.Duration 57 } 58 59 // MemberAlarm represents an alarm type association with a cluster member. 60 type MemberAlarm struct { 61 // MemberID is the ID of the member associated with the raised alarm. 62 MemberID uint64 63 64 // Type is the type of alarm which has been raised. 65 Type AlarmType 66 } 67 68 // AlarmType defines the type of alarm for etcd. 69 type AlarmType int32 70 71 const ( 72 // AlarmOK denotes that the cluster member is OK. 73 AlarmOK AlarmType = iota 74 75 // AlarmNoSpace denotes that the cluster member has run out of disk space. 76 AlarmNoSpace 77 78 // AlarmCorrupt denotes that the cluster member has corrupted data. 79 AlarmCorrupt 80 ) 81 82 // DefaultCallTimeout represents the duration that the etcd client waits at most 83 // for read and write operations to etcd. 84 const DefaultCallTimeout = 15 * time.Second 85 86 // AlarmTypeName provides a text translation for AlarmType codes. 87 var AlarmTypeName = map[AlarmType]string{ 88 AlarmOK: "NONE", 89 AlarmNoSpace: "NOSPACE", 90 AlarmCorrupt: "CORRUPT", 91 } 92 93 // Adapted from kubeadm. 94 95 // Member struct defines an etcd member; it is used to avoid spreading 96 // github.com/coreos/etcd dependencies. 97 type Member struct { 98 // ClusterID is the ID of the cluster to which this member belongs 99 ClusterID uint64 100 101 // ID is the ID of this cluster member 102 ID uint64 103 104 // Name is the human-readable name of the member. If the member is not started, the name will be an empty string. 105 Name string 106 107 // PeerURLs is the list of URLs the member exposes to the cluster for communication. 108 PeerURLs []string 109 110 // ClientURLs is the list of URLs the member exposes to clients for communication. If the member is not started, clientURLs will be empty. 111 ClientURLs []string 112 113 // IsLearner indicates if the member is raft learner. 114 IsLearner bool 115 116 // Alarms is the list of alarms for a member. 117 Alarms []AlarmType 118 } 119 120 // pbMemberToMember converts the protobuf representation of a cluster member to a Member struct. 121 func pbMemberToMember(m *etcdserverpb.Member) *Member { 122 return &Member{ 123 ID: m.GetID(), 124 Name: m.GetName(), 125 PeerURLs: m.GetPeerURLs(), 126 ClientURLs: m.GetClientURLs(), 127 IsLearner: m.GetIsLearner(), 128 Alarms: []AlarmType{}, 129 } 130 } 131 132 // ClientConfiguration describes the configuration for an etcd client. 133 type ClientConfiguration struct { 134 Endpoint string 135 Proxy proxy.Proxy 136 TLSConfig *tls.Config 137 DialTimeout time.Duration 138 CallTimeout time.Duration 139 } 140 141 // NewClient creates a new etcd client with the given configuration. 142 func NewClient(ctx context.Context, config ClientConfiguration) (*Client, error) { 143 dialer, err := proxy.NewDialer(config.Proxy) 144 if err != nil { 145 return nil, errors.Wrap(err, "unable to create a dialer for etcd client") 146 } 147 148 etcdClient, err := clientv3.New(clientv3.Config{ 149 Endpoints: []string{config.Endpoint}, // NOTE: endpoint is used only as a host for certificate validation, the network connection is defined by DialOptions. 150 DialTimeout: config.DialTimeout, 151 DialOptions: []grpc.DialOption{ 152 grpc.WithBlock(), // block until the underlying connection is up 153 grpc.WithContextDialer(dialer.DialContextWithAddr), 154 }, 155 TLS: config.TLSConfig, 156 }) 157 if err != nil { 158 return nil, errors.Wrap(err, "unable to create etcd client") 159 } 160 161 callTimeout := config.CallTimeout 162 if callTimeout == 0 { 163 callTimeout = DefaultCallTimeout 164 } 165 166 client, err := newEtcdClient(ctx, etcdClient, callTimeout) 167 if err != nil { 168 closeErr := etcdClient.Close() 169 return nil, errors.Wrap(kerrors.NewAggregate([]error{err, closeErr}), "unable to create etcd client") 170 } 171 return client, nil 172 } 173 174 func newEtcdClient(ctx context.Context, etcdClient etcd, callTimeout time.Duration) (*Client, error) { 175 endpoints := etcdClient.Endpoints() 176 if len(endpoints) == 0 { 177 return nil, errors.New("etcd client was not configured with any endpoints") 178 } 179 180 ctx, cancel := context.WithTimeout(ctx, callTimeout) 181 defer cancel() 182 183 status, err := etcdClient.Status(ctx, endpoints[0]) 184 if err != nil { 185 return nil, errors.Wrap(err, "failed to get etcd status") 186 } 187 188 return &Client{ 189 Endpoint: endpoints[0], 190 EtcdClient: etcdClient, 191 LeaderID: status.Leader, 192 Errors: status.Errors, 193 CallTimeout: callTimeout, 194 }, nil 195 } 196 197 // Close closes the etcd client. 198 func (c *Client) Close() error { 199 return c.EtcdClient.Close() 200 } 201 202 // Members retrieves a list of etcd members. 203 func (c *Client) Members(ctx context.Context) ([]*Member, error) { 204 ctx, cancel := context.WithTimeout(ctx, c.CallTimeout) 205 defer cancel() 206 207 response, err := c.EtcdClient.MemberList(ctx) 208 if err != nil { 209 return nil, errors.Wrap(err, "failed to get list of members for etcd cluster") 210 } 211 212 alarms, err := c.Alarms(ctx) 213 if err != nil { 214 return nil, err 215 } 216 217 clusterID := response.Header.GetClusterId() 218 members := make([]*Member, 0) 219 for _, m := range response.Members { 220 newMember := pbMemberToMember(m) 221 newMember.ClusterID = clusterID 222 for _, c := range alarms { 223 if c.MemberID == newMember.ID { 224 newMember.Alarms = append(newMember.Alarms, c.Type) 225 } 226 } 227 members = append(members, newMember) 228 } 229 230 return members, nil 231 } 232 233 // MoveLeader moves the leader to the provided member ID. 234 func (c *Client) MoveLeader(ctx context.Context, newLeaderID uint64) error { 235 ctx, cancel := context.WithTimeout(ctx, c.CallTimeout) 236 defer cancel() 237 238 _, err := c.EtcdClient.MoveLeader(ctx, newLeaderID) 239 return errors.Wrapf(err, "failed to move etcd leader: %v", newLeaderID) 240 } 241 242 // RemoveMember removes a given member. 243 func (c *Client) RemoveMember(ctx context.Context, id uint64) error { 244 ctx, cancel := context.WithTimeout(ctx, c.CallTimeout) 245 defer cancel() 246 247 _, err := c.EtcdClient.MemberRemove(ctx, id) 248 return errors.Wrapf(err, "failed to remove member: %v", id) 249 } 250 251 // UpdateMemberPeerURLs updates the list of peer URLs. 252 func (c *Client) UpdateMemberPeerURLs(ctx context.Context, id uint64, peerURLs []string) ([]*Member, error) { 253 ctx, cancel := context.WithTimeout(ctx, c.CallTimeout) 254 defer cancel() 255 256 response, err := c.EtcdClient.MemberUpdate(ctx, id, peerURLs) 257 if err != nil { 258 return nil, errors.Wrapf(err, "failed to update etcd member %v's peer list to %+v", id, peerURLs) 259 } 260 261 members := make([]*Member, 0, len(response.Members)) 262 for _, m := range response.Members { 263 members = append(members, pbMemberToMember(m)) 264 } 265 266 return members, nil 267 } 268 269 // Alarms retrieves all alarms on a cluster. 270 func (c *Client) Alarms(ctx context.Context) ([]MemberAlarm, error) { 271 ctx, cancel := context.WithTimeout(ctx, c.CallTimeout) 272 defer cancel() 273 274 alarmResponse, err := c.EtcdClient.AlarmList(ctx) 275 if err != nil { 276 return nil, errors.Wrap(err, "failed to get alarms for etcd cluster") 277 } 278 279 memberAlarms := make([]MemberAlarm, 0, len(alarmResponse.Alarms)) 280 for _, a := range alarmResponse.Alarms { 281 memberAlarms = append(memberAlarms, MemberAlarm{ 282 MemberID: a.GetMemberID(), 283 Type: AlarmType(a.GetAlarm()), 284 }) 285 } 286 287 return memberAlarms, nil 288 }