go.etcd.io/etcd@v3.3.27+incompatible/embed/etcd.go (about) 1 // Copyright 2016 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package embed 16 17 import ( 18 "context" 19 "crypto/tls" 20 "fmt" 21 "io/ioutil" 22 defaultLog "log" 23 "net" 24 "net/http" 25 "net/url" 26 "strconv" 27 "sync" 28 "time" 29 30 "github.com/coreos/etcd/compactor" 31 "github.com/coreos/etcd/etcdserver" 32 "github.com/coreos/etcd/etcdserver/api/etcdhttp" 33 "github.com/coreos/etcd/etcdserver/api/v2http" 34 "github.com/coreos/etcd/etcdserver/api/v2v3" 35 "github.com/coreos/etcd/etcdserver/api/v3client" 36 "github.com/coreos/etcd/etcdserver/api/v3rpc" 37 "github.com/coreos/etcd/pkg/cors" 38 "github.com/coreos/etcd/pkg/debugutil" 39 runtimeutil "github.com/coreos/etcd/pkg/runtime" 40 "github.com/coreos/etcd/pkg/transport" 41 "github.com/coreos/etcd/pkg/types" 42 "github.com/coreos/etcd/rafthttp" 43 44 "github.com/coreos/pkg/capnslog" 45 grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" 46 "github.com/soheilhy/cmux" 47 "google.golang.org/grpc" 48 "google.golang.org/grpc/keepalive" 49 ) 50 51 var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "embed") 52 53 const ( 54 // internal fd usage includes disk usage and transport usage. 55 // To read/write snapshot, snap pkg needs 1. In normal case, wal pkg needs 56 // at most 2 to read/lock/write WALs. One case that it needs to 2 is to 57 // read all logs after some snapshot index, which locates at the end of 58 // the second last and the head of the last. For purging, it needs to read 59 // directory, so it needs 1. For fd monitor, it needs 1. 60 // For transport, rafthttp builds two long-polling connections and at most 61 // four temporary connections with each member. There are at most 9 members 62 // in a cluster, so it should reserve 96. 63 // For the safety, we set the total reserved number to 150. 64 reservedInternalFDNum = 150 65 ) 66 67 // Etcd contains a running etcd server and its listeners. 68 type Etcd struct { 69 Peers []*peerListener 70 Clients []net.Listener 71 // a map of contexts for the servers that serves client requests. 72 sctxs map[string]*serveCtx 73 metricsListeners []net.Listener 74 75 Server *etcdserver.EtcdServer 76 77 cfg Config 78 stopc chan struct{} 79 errc chan error 80 81 closeOnce sync.Once 82 } 83 84 type peerListener struct { 85 net.Listener 86 serve func() error 87 close func(context.Context) error 88 } 89 90 // StartEtcd launches the etcd server and HTTP handlers for client/server communication. 91 // The returned Etcd.Server is not guaranteed to have joined the cluster. Wait 92 // on the Etcd.Server.ReadyNotify() channel to know when it completes and is ready for use. 93 func StartEtcd(inCfg *Config) (e *Etcd, err error) { 94 if err = inCfg.Validate(); err != nil { 95 return nil, err 96 } 97 serving := false 98 e = &Etcd{cfg: *inCfg, stopc: make(chan struct{})} 99 cfg := &e.cfg 100 defer func() { 101 if e == nil || err == nil { 102 return 103 } 104 if !serving { 105 // errored before starting gRPC server for serveCtx.serversC 106 for _, sctx := range e.sctxs { 107 close(sctx.serversC) 108 } 109 } 110 e.Close() 111 e = nil 112 }() 113 114 if e.Peers, err = startPeerListeners(cfg); err != nil { 115 return e, err 116 } 117 if e.sctxs, err = startClientListeners(cfg); err != nil { 118 return e, err 119 } 120 for _, sctx := range e.sctxs { 121 e.Clients = append(e.Clients, sctx.l) 122 } 123 124 var ( 125 urlsmap types.URLsMap 126 token string 127 ) 128 129 memberInitialized := true 130 if !isMemberInitialized(cfg) { 131 memberInitialized = false 132 urlsmap, token, err = cfg.PeerURLsMapAndToken("etcd") 133 if err != nil { 134 return e, fmt.Errorf("error setting up initial cluster: %v", err) 135 } 136 } 137 138 // AutoCompactionRetention defaults to "0" if not set. 139 if len(cfg.AutoCompactionRetention) == 0 { 140 cfg.AutoCompactionRetention = "0" 141 } 142 autoCompactionRetention, err := parseCompactionRetention(cfg.AutoCompactionMode, cfg.AutoCompactionRetention) 143 if err != nil { 144 return e, err 145 } 146 147 srvcfg := etcdserver.ServerConfig{ 148 Name: cfg.Name, 149 ClientURLs: cfg.ACUrls, 150 PeerURLs: cfg.APUrls, 151 DataDir: cfg.Dir, 152 DedicatedWALDir: cfg.WalDir, 153 SnapCount: cfg.SnapCount, 154 MaxSnapFiles: cfg.MaxSnapFiles, 155 MaxWALFiles: cfg.MaxWalFiles, 156 InitialPeerURLsMap: urlsmap, 157 InitialClusterToken: token, 158 DiscoveryURL: cfg.Durl, 159 DiscoveryProxy: cfg.Dproxy, 160 NewCluster: cfg.IsNewCluster(), 161 ForceNewCluster: cfg.ForceNewCluster, 162 PeerTLSInfo: cfg.PeerTLSInfo, 163 TickMs: cfg.TickMs, 164 ElectionTicks: cfg.ElectionTicks(), 165 InitialElectionTickAdvance: cfg.InitialElectionTickAdvance, 166 AutoCompactionRetention: autoCompactionRetention, 167 AutoCompactionMode: cfg.AutoCompactionMode, 168 QuotaBackendBytes: cfg.QuotaBackendBytes, 169 MaxTxnOps: cfg.MaxTxnOps, 170 MaxRequestBytes: cfg.MaxRequestBytes, 171 StrictReconfigCheck: cfg.StrictReconfigCheck, 172 ClientCertAuthEnabled: cfg.ClientTLSInfo.ClientCertAuth, 173 AuthToken: cfg.AuthToken, 174 TokenTTL: cfg.AuthTokenTTL, 175 InitialCorruptCheck: cfg.ExperimentalInitialCorruptCheck, 176 CorruptCheckTime: cfg.ExperimentalCorruptCheckTime, 177 Debug: cfg.Debug, 178 } 179 180 if e.Server, err = etcdserver.NewServer(srvcfg); err != nil { 181 return e, err 182 } 183 184 // buffer channel so goroutines on closed connections won't wait forever 185 e.errc = make(chan error, len(e.Peers)+len(e.Clients)+2*len(e.sctxs)) 186 187 // newly started member ("memberInitialized==false") 188 // does not need corruption check 189 if memberInitialized { 190 if err = e.Server.CheckInitialHashKV(); err != nil { 191 // set "EtcdServer" to nil, so that it does not block on "EtcdServer.Close()" 192 // (nothing to close since rafthttp transports have not been started) 193 e.Server = nil 194 return e, err 195 } 196 } 197 e.Server.Start() 198 199 if err = e.servePeers(); err != nil { 200 return e, err 201 } 202 if err = e.serveClients(); err != nil { 203 return e, err 204 } 205 if err = e.serveMetrics(); err != nil { 206 return e, err 207 } 208 209 serving = true 210 return e, nil 211 } 212 213 // Config returns the current configuration. 214 func (e *Etcd) Config() Config { 215 return e.cfg 216 } 217 218 // Close gracefully shuts down all servers/listeners. 219 // Client requests will be terminated with request timeout. 220 // After timeout, enforce remaning requests be closed immediately. 221 func (e *Etcd) Close() { 222 e.closeOnce.Do(func() { close(e.stopc) }) 223 224 // close client requests with request timeout 225 timeout := 2 * time.Second 226 if e.Server != nil { 227 timeout = e.Server.Cfg.ReqTimeout() 228 } 229 for _, sctx := range e.sctxs { 230 for ss := range sctx.serversC { 231 ctx, cancel := context.WithTimeout(context.Background(), timeout) 232 stopServers(ctx, ss) 233 cancel() 234 } 235 } 236 237 for _, sctx := range e.sctxs { 238 sctx.cancel() 239 } 240 241 for i := range e.Clients { 242 if e.Clients[i] != nil { 243 e.Clients[i].Close() 244 } 245 } 246 247 for i := range e.metricsListeners { 248 e.metricsListeners[i].Close() 249 } 250 251 // close rafthttp transports 252 if e.Server != nil { 253 e.Server.Stop() 254 } 255 256 // close all idle connections in peer handler (wait up to 1-second) 257 for i := range e.Peers { 258 if e.Peers[i] != nil && e.Peers[i].close != nil { 259 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 260 e.Peers[i].close(ctx) 261 cancel() 262 } 263 } 264 } 265 266 func stopServers(ctx context.Context, ss *servers) { 267 shutdownNow := func() { 268 // first, close the http.Server 269 ss.http.Shutdown(ctx) 270 // then close grpc.Server; cancels all active RPCs 271 ss.grpc.Stop() 272 } 273 274 // do not grpc.Server.GracefulStop with TLS enabled etcd server 275 // See https://github.com/grpc/grpc-go/issues/1384#issuecomment-317124531 276 // and https://github.com/coreos/etcd/issues/8916 277 if ss.secure { 278 shutdownNow() 279 return 280 } 281 282 ch := make(chan struct{}) 283 go func() { 284 defer close(ch) 285 // close listeners to stop accepting new connections, 286 // will block on any existing transports 287 ss.grpc.GracefulStop() 288 }() 289 290 // wait until all pending RPCs are finished 291 select { 292 case <-ch: 293 case <-ctx.Done(): 294 // took too long, manually close open transports 295 // e.g. watch streams 296 shutdownNow() 297 298 // concurrent GracefulStop should be interrupted 299 <-ch 300 } 301 } 302 303 func (e *Etcd) Err() <-chan error { return e.errc } 304 305 func startPeerListeners(cfg *Config) (peers []*peerListener, err error) { 306 if err = updateCipherSuites(&cfg.PeerTLSInfo, cfg.CipherSuites); err != nil { 307 return nil, err 308 } 309 if err = cfg.PeerSelfCert(); err != nil { 310 plog.Fatalf("could not get certs (%v)", err) 311 } 312 if !cfg.PeerTLSInfo.Empty() { 313 plog.Infof("peerTLS: %s", cfg.PeerTLSInfo) 314 } 315 316 peers = make([]*peerListener, len(cfg.LPUrls)) 317 defer func() { 318 if err == nil { 319 return 320 } 321 for i := range peers { 322 if peers[i] != nil && peers[i].close != nil { 323 plog.Info("stopping listening for peers on ", cfg.LPUrls[i].String()) 324 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 325 peers[i].close(ctx) 326 cancel() 327 } 328 } 329 }() 330 331 for i, u := range cfg.LPUrls { 332 if u.Scheme == "http" { 333 if !cfg.PeerTLSInfo.Empty() { 334 plog.Warningf("The scheme of peer url %s is HTTP while peer key/cert files are presented. Ignored peer key/cert files.", u.String()) 335 } 336 if cfg.PeerTLSInfo.ClientCertAuth { 337 plog.Warningf("The scheme of peer url %s is HTTP while client cert auth (--peer-client-cert-auth) is enabled. Ignored client cert auth for this url.", u.String()) 338 } 339 } 340 peers[i] = &peerListener{close: func(context.Context) error { return nil }} 341 peers[i].Listener, err = rafthttp.NewListener(u, &cfg.PeerTLSInfo) 342 if err != nil { 343 return nil, err 344 } 345 // once serve, overwrite with 'http.Server.Shutdown' 346 peers[i].close = func(context.Context) error { 347 return peers[i].Listener.Close() 348 } 349 plog.Info("listening for peers on ", u.String()) 350 } 351 return peers, nil 352 } 353 354 // configure peer handlers after rafthttp.Transport started 355 func (e *Etcd) servePeers() (err error) { 356 ph := etcdhttp.NewPeerHandler(e.Server) 357 var peerTLScfg *tls.Config 358 if !e.cfg.PeerTLSInfo.Empty() { 359 if peerTLScfg, err = e.cfg.PeerTLSInfo.ServerConfig(); err != nil { 360 return err 361 } 362 } 363 364 for _, p := range e.Peers { 365 gs := v3rpc.Server(e.Server, peerTLScfg) 366 m := cmux.New(p.Listener) 367 go gs.Serve(m.Match(cmux.HTTP2())) 368 srv := &http.Server{ 369 Handler: grpcHandlerFunc(gs, ph), 370 ReadTimeout: 5 * time.Minute, 371 ErrorLog: defaultLog.New(ioutil.Discard, "", 0), // do not log user error 372 } 373 go srv.Serve(m.Match(cmux.Any())) 374 p.serve = func() error { return m.Serve() } 375 p.close = func(ctx context.Context) error { 376 // gracefully shutdown http.Server 377 // close open listeners, idle connections 378 // until context cancel or time-out 379 stopServers(ctx, &servers{secure: peerTLScfg != nil, grpc: gs, http: srv}) 380 return nil 381 } 382 } 383 384 // start peer servers in a goroutine 385 for _, pl := range e.Peers { 386 go func(l *peerListener) { 387 e.errHandler(l.serve()) 388 }(pl) 389 } 390 return nil 391 } 392 393 func startClientListeners(cfg *Config) (sctxs map[string]*serveCtx, err error) { 394 if err = updateCipherSuites(&cfg.ClientTLSInfo, cfg.CipherSuites); err != nil { 395 return nil, err 396 } 397 if err = cfg.ClientSelfCert(); err != nil { 398 plog.Fatalf("could not get certs (%v)", err) 399 } 400 if cfg.EnablePprof { 401 plog.Infof("pprof is enabled under %s", debugutil.HTTPPrefixPProf) 402 } 403 404 sctxs = make(map[string]*serveCtx) 405 for _, u := range cfg.LCUrls { 406 sctx := newServeCtx() 407 408 if u.Scheme == "http" || u.Scheme == "unix" { 409 if !cfg.ClientTLSInfo.Empty() { 410 plog.Warningf("The scheme of client url %s is HTTP while peer key/cert files are presented. Ignored key/cert files.", u.String()) 411 } 412 if cfg.ClientTLSInfo.ClientCertAuth { 413 plog.Warningf("The scheme of client url %s is HTTP while client cert auth (--client-cert-auth) is enabled. Ignored client cert auth for this url.", u.String()) 414 } 415 } 416 if (u.Scheme == "https" || u.Scheme == "unixs") && cfg.ClientTLSInfo.Empty() { 417 return nil, fmt.Errorf("TLS key/cert (--cert-file, --key-file) must be provided for client url %s with HTTPs scheme", u.String()) 418 } 419 420 proto := "tcp" 421 addr := u.Host 422 if u.Scheme == "unix" || u.Scheme == "unixs" { 423 proto = "unix" 424 addr = u.Host + u.Path 425 } 426 427 sctx.secure = u.Scheme == "https" || u.Scheme == "unixs" 428 sctx.insecure = !sctx.secure 429 if oldctx := sctxs[addr]; oldctx != nil { 430 oldctx.secure = oldctx.secure || sctx.secure 431 oldctx.insecure = oldctx.insecure || sctx.insecure 432 continue 433 } 434 435 if sctx.l, err = net.Listen(proto, addr); err != nil { 436 return nil, err 437 } 438 // net.Listener will rewrite ipv4 0.0.0.0 to ipv6 [::], breaking 439 // hosts that disable ipv6. So, use the address given by the user. 440 sctx.addr = addr 441 442 if fdLimit, fderr := runtimeutil.FDLimit(); fderr == nil { 443 if fdLimit <= reservedInternalFDNum { 444 plog.Fatalf("file descriptor limit[%d] of etcd process is too low, and should be set higher than %d to ensure internal usage", fdLimit, reservedInternalFDNum) 445 } 446 sctx.l = transport.LimitListener(sctx.l, int(fdLimit-reservedInternalFDNum)) 447 } 448 449 if proto == "tcp" { 450 if sctx.l, err = transport.NewKeepAliveListener(sctx.l, "tcp", nil); err != nil { 451 return nil, err 452 } 453 } 454 455 plog.Info("listening for client requests on ", u.Host) 456 defer func() { 457 if err != nil { 458 sctx.l.Close() 459 plog.Info("stopping listening for client requests on ", u.Host) 460 } 461 }() 462 for k := range cfg.UserHandlers { 463 sctx.userHandlers[k] = cfg.UserHandlers[k] 464 } 465 sctx.serviceRegister = cfg.ServiceRegister 466 if cfg.EnablePprof || cfg.Debug { 467 sctx.registerPprof() 468 } 469 if cfg.Debug { 470 sctx.registerTrace() 471 } 472 sctxs[addr] = sctx 473 } 474 return sctxs, nil 475 } 476 477 func (e *Etcd) serveClients() (err error) { 478 if !e.cfg.ClientTLSInfo.Empty() { 479 plog.Infof("ClientTLS: %s", e.cfg.ClientTLSInfo) 480 } 481 482 if e.cfg.CorsInfo.String() != "" { 483 plog.Infof("cors = %s", e.cfg.CorsInfo) 484 } 485 486 // Start a client server goroutine for each listen address 487 var h http.Handler 488 if e.Config().EnableV2 { 489 if len(e.Config().ExperimentalEnableV2V3) > 0 { 490 srv := v2v3.NewServer(v3client.New(e.Server), e.cfg.ExperimentalEnableV2V3) 491 h = v2http.NewClientHandler(srv, e.Server.Cfg.ReqTimeout()) 492 } else { 493 h = v2http.NewClientHandler(e.Server, e.Server.Cfg.ReqTimeout()) 494 } 495 } else { 496 mux := http.NewServeMux() 497 etcdhttp.HandleBasic(mux, e.Server) 498 h = mux 499 } 500 h = http.Handler(&cors.CORSHandler{Handler: h, Info: e.cfg.CorsInfo}) 501 502 gopts := []grpc.ServerOption{} 503 if e.cfg.GRPCKeepAliveMinTime > time.Duration(0) { 504 gopts = append(gopts, grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ 505 MinTime: e.cfg.GRPCKeepAliveMinTime, 506 PermitWithoutStream: false, 507 })) 508 } 509 if e.cfg.GRPCKeepAliveInterval > time.Duration(0) && 510 e.cfg.GRPCKeepAliveTimeout > time.Duration(0) { 511 gopts = append(gopts, grpc.KeepaliveParams(keepalive.ServerParameters{ 512 Time: e.cfg.GRPCKeepAliveInterval, 513 Timeout: e.cfg.GRPCKeepAliveTimeout, 514 })) 515 } 516 517 // start client servers in a goroutine 518 for _, sctx := range e.sctxs { 519 go func(s *serveCtx) { 520 e.errHandler(s.serve(e.Server, &e.cfg.ClientTLSInfo, h, e.errHandler, gopts...)) 521 }(sctx) 522 } 523 return nil 524 } 525 526 func (e *Etcd) serveMetrics() (err error) { 527 if e.cfg.Metrics == "extensive" { 528 grpc_prometheus.EnableHandlingTimeHistogram() 529 } 530 531 if len(e.cfg.ListenMetricsUrls) > 0 { 532 metricsMux := http.NewServeMux() 533 etcdhttp.HandleMetricsHealth(metricsMux, e.Server) 534 535 for _, murl := range e.cfg.ListenMetricsUrls { 536 tlsInfo := &e.cfg.ClientTLSInfo 537 if murl.Scheme == "http" { 538 tlsInfo = nil 539 } 540 ml, err := transport.NewListener(murl.Host, murl.Scheme, tlsInfo) 541 if err != nil { 542 return err 543 } 544 e.metricsListeners = append(e.metricsListeners, ml) 545 go func(u url.URL, ln net.Listener) { 546 plog.Info("listening for metrics on ", u.String()) 547 e.errHandler(http.Serve(ln, metricsMux)) 548 }(murl, ml) 549 } 550 } 551 return nil 552 } 553 554 func (e *Etcd) errHandler(err error) { 555 select { 556 case <-e.stopc: 557 return 558 default: 559 } 560 select { 561 case <-e.stopc: 562 case e.errc <- err: 563 } 564 } 565 566 func parseCompactionRetention(mode, retention string) (ret time.Duration, err error) { 567 h, err := strconv.Atoi(retention) 568 if err == nil && h >= 0 { 569 switch mode { 570 case compactor.ModeRevision: 571 ret = time.Duration(int64(h)) 572 case compactor.ModePeriodic: 573 ret = time.Duration(int64(h)) * time.Hour 574 } 575 } else { 576 // periodic compaction 577 ret, err = time.ParseDuration(retention) 578 if err != nil { 579 return 0, fmt.Errorf("error parsing CompactionRetention: %v", err) 580 } 581 } 582 return ret, nil 583 }