github.imxd.top/hashicorp/consul@v1.4.5/agent/consul/client.go (about) 1 package consul 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "os" 8 "strconv" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/armon/go-metrics" 14 "github.com/hashicorp/consul/agent/pool" 15 "github.com/hashicorp/consul/agent/router" 16 "github.com/hashicorp/consul/agent/structs" 17 "github.com/hashicorp/consul/lib" 18 "github.com/hashicorp/consul/tlsutil" 19 "github.com/hashicorp/serf/serf" 20 "golang.org/x/time/rate" 21 ) 22 23 const ( 24 // clientRPCConnMaxIdle controls how long we keep an idle connection 25 // open to a server. 127s was chosen as the first prime above 120s 26 // (arbitrarily chose to use a prime) with the intent of reusing 27 // connections who are used by once-a-minute cron(8) jobs *and* who 28 // use a 60s jitter window (e.g. in vixie cron job execution can 29 // drift by up to 59s per job, or 119s for a once-a-minute cron job). 30 clientRPCConnMaxIdle = 127 * time.Second 31 32 // clientMaxStreams controls how many idle streams we keep 33 // open to a server 34 clientMaxStreams = 32 35 36 // serfEventBacklog is the maximum number of unprocessed Serf Events 37 // that will be held in queue before new serf events block. A 38 // blocking serf event queue is a bad thing. 39 serfEventBacklog = 256 40 41 // serfEventBacklogWarning is the threshold at which point log 42 // warnings will be emitted indicating a problem when processing serf 43 // events. 44 serfEventBacklogWarning = 200 45 ) 46 47 // Client is Consul client which uses RPC to communicate with the 48 // services for service discovery, health checking, and DC forwarding. 49 type Client struct { 50 config *Config 51 52 // acls is used to resolve tokens to effective policies 53 acls *ACLResolver 54 55 // DEPRECATED (ACL-Legacy-Compat) - Only needed while we support both 56 // useNewACLs is a flag to indicate whether we are using the new ACL system 57 useNewACLs int32 58 59 // Connection pool to consul servers 60 connPool *pool.ConnPool 61 62 // routers is responsible for the selection and maintenance of 63 // Consul servers this agent uses for RPC requests 64 routers *router.Manager 65 66 // rpcLimiter is used to rate limit the total number of RPCs initiated 67 // from an agent. 68 rpcLimiter atomic.Value 69 70 // eventCh is used to receive events from the 71 // serf cluster in the datacenter 72 eventCh chan serf.Event 73 74 // Logger uses the provided LogOutput 75 logger *log.Logger 76 77 // serf is the Serf cluster maintained inside the DC 78 // which contains all the DC nodes 79 serf *serf.Serf 80 81 shutdown bool 82 shutdownCh chan struct{} 83 shutdownLock sync.Mutex 84 85 // embedded struct to hold all the enterprise specific data 86 EnterpriseClient 87 } 88 89 // NewClient is used to construct a new Consul client from the configuration, 90 // potentially returning an error. 91 // NewClient only used to help setting up a client for testing. Normal code 92 // exercises NewClientLogger. 93 func NewClient(config *Config) (*Client, error) { 94 c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil) 95 if err != nil { 96 return nil, err 97 } 98 return NewClientLogger(config, nil, c) 99 } 100 101 func NewClientLogger(config *Config, logger *log.Logger, tlsConfigurator *tlsutil.Configurator) (*Client, error) { 102 // Check the protocol version 103 if err := config.CheckProtocolVersion(); err != nil { 104 return nil, err 105 } 106 107 // Check for a data directory! 108 if config.DataDir == "" { 109 return nil, fmt.Errorf("Config must provide a DataDir") 110 } 111 112 // Sanity check the ACLs 113 if err := config.CheckACL(); err != nil { 114 return nil, err 115 } 116 117 // Ensure we have a log output 118 if config.LogOutput == nil { 119 config.LogOutput = os.Stderr 120 } 121 122 // Create a logger 123 if logger == nil { 124 logger = log.New(config.LogOutput, "", log.LstdFlags) 125 } 126 127 connPool := &pool.ConnPool{ 128 SrcAddr: config.RPCSrcAddr, 129 LogOutput: config.LogOutput, 130 MaxTime: clientRPCConnMaxIdle, 131 MaxStreams: clientMaxStreams, 132 TLSWrapper: tlsConfigurator.OutgoingRPCWrapper(), 133 ForceTLS: config.VerifyOutgoing, 134 } 135 136 // Create client 137 c := &Client{ 138 config: config, 139 connPool: connPool, 140 eventCh: make(chan serf.Event, serfEventBacklog), 141 logger: logger, 142 shutdownCh: make(chan struct{}), 143 } 144 145 c.rpcLimiter.Store(rate.NewLimiter(config.RPCRate, config.RPCMaxBurst)) 146 147 if err := c.initEnterprise(); err != nil { 148 c.Shutdown() 149 return nil, err 150 } 151 152 c.useNewACLs = 0 153 aclConfig := ACLResolverConfig{ 154 Config: config, 155 Delegate: c, 156 Logger: logger, 157 AutoDisable: true, 158 CacheConfig: clientACLCacheConfig, 159 Sentinel: nil, 160 } 161 var err error 162 if c.acls, err = NewACLResolver(&aclConfig); err != nil { 163 c.Shutdown() 164 return nil, fmt.Errorf("Failed to create ACL resolver: %v", err) 165 } 166 167 // Initialize the LAN Serf 168 c.serf, err = c.setupSerf(config.SerfLANConfig, 169 c.eventCh, serfLANSnapshot) 170 if err != nil { 171 c.Shutdown() 172 return nil, fmt.Errorf("Failed to start lan serf: %v", err) 173 } 174 175 if c.acls.ACLsEnabled() { 176 go c.monitorACLMode() 177 } 178 179 // Start maintenance task for servers 180 c.routers = router.New(c.logger, c.shutdownCh, c.serf, c.connPool) 181 go c.routers.Start() 182 183 // Start LAN event handlers after the router is complete since the event 184 // handlers depend on the router and the router depends on Serf. 185 go c.lanEventHandler() 186 187 if err := c.startEnterprise(); err != nil { 188 c.Shutdown() 189 return nil, err 190 } 191 192 return c, nil 193 } 194 195 // Shutdown is used to shutdown the client 196 func (c *Client) Shutdown() error { 197 c.logger.Printf("[INFO] consul: shutting down client") 198 c.shutdownLock.Lock() 199 defer c.shutdownLock.Unlock() 200 201 if c.shutdown { 202 return nil 203 } 204 205 c.shutdown = true 206 close(c.shutdownCh) 207 208 if c.serf != nil { 209 c.serf.Shutdown() 210 } 211 212 // Close the connection pool 213 c.connPool.Shutdown() 214 return nil 215 } 216 217 // Leave is used to prepare for a graceful shutdown 218 func (c *Client) Leave() error { 219 c.logger.Printf("[INFO] consul: client starting leave") 220 221 // Leave the LAN pool 222 if c.serf != nil { 223 if err := c.serf.Leave(); err != nil { 224 c.logger.Printf("[ERR] consul: Failed to leave LAN Serf cluster: %v", err) 225 } 226 } 227 return nil 228 } 229 230 // JoinLAN is used to have Consul client join the inner-DC pool 231 // The target address should be another node inside the DC 232 // listening on the Serf LAN address 233 func (c *Client) JoinLAN(addrs []string) (int, error) { 234 return c.serf.Join(addrs, true) 235 } 236 237 // LocalMember is used to return the local node 238 func (c *Client) LocalMember() serf.Member { 239 return c.serf.LocalMember() 240 } 241 242 // LANMembers is used to return the members of the LAN cluster 243 func (c *Client) LANMembers() []serf.Member { 244 return c.serf.Members() 245 } 246 247 // LANMembersAllSegments returns members from all segments. 248 func (c *Client) LANMembersAllSegments() ([]serf.Member, error) { 249 return c.serf.Members(), nil 250 } 251 252 // LANSegmentMembers only returns our own segment's members, because clients 253 // can't be in multiple segments. 254 func (c *Client) LANSegmentMembers(segment string) ([]serf.Member, error) { 255 if segment == c.config.Segment { 256 return c.LANMembers(), nil 257 } 258 259 return nil, fmt.Errorf("segment %q not found", segment) 260 } 261 262 // RemoveFailedNode is used to remove a failed node from the cluster 263 func (c *Client) RemoveFailedNode(node string) error { 264 return c.serf.RemoveFailedNode(node) 265 } 266 267 // KeyManagerLAN returns the LAN Serf keyring manager 268 func (c *Client) KeyManagerLAN() *serf.KeyManager { 269 return c.serf.KeyManager() 270 } 271 272 // Encrypted determines if gossip is encrypted 273 func (c *Client) Encrypted() bool { 274 return c.serf.EncryptionEnabled() 275 } 276 277 // RPC is used to forward an RPC call to a consul server, or fail if no servers 278 func (c *Client) RPC(method string, args interface{}, reply interface{}) error { 279 // This is subtle but we start measuring the time on the client side 280 // right at the time of the first request, vs. on the first retry as 281 // is done on the server side inside forward(). This is because the 282 // servers may already be applying the RPCHoldTimeout up there, so by 283 // starting the timer here we won't potentially double up the delay. 284 // TODO (slackpad) Plumb a deadline here with a context. 285 firstCheck := time.Now() 286 287 TRY: 288 server := c.routers.FindServer() 289 if server == nil { 290 return structs.ErrNoServers 291 } 292 293 // Enforce the RPC limit. 294 metrics.IncrCounter([]string{"client", "rpc"}, 1) 295 if !c.rpcLimiter.Load().(*rate.Limiter).Allow() { 296 metrics.IncrCounter([]string{"client", "rpc", "exceeded"}, 1) 297 return structs.ErrRPCRateExceeded 298 } 299 300 // Make the request. 301 rpcErr := c.connPool.RPC(c.config.Datacenter, server.Addr, server.Version, method, server.UseTLS, args, reply) 302 if rpcErr == nil { 303 return nil 304 } 305 306 // Move off to another server, and see if we can retry. 307 c.logger.Printf("[ERR] consul: %q RPC failed to server %s: %v", method, server.Addr, rpcErr) 308 metrics.IncrCounterWithLabels([]string{"client", "rpc", "failed"}, 1, []metrics.Label{{Name: "server", Value: server.Name}}) 309 c.routers.NotifyFailedServer(server) 310 if retry := canRetry(args, rpcErr); !retry { 311 return rpcErr 312 } 313 314 // We can wait a bit and retry! 315 if time.Since(firstCheck) < c.config.RPCHoldTimeout { 316 jitter := lib.RandomStagger(c.config.RPCHoldTimeout / jitterFraction) 317 select { 318 case <-time.After(jitter): 319 goto TRY 320 case <-c.shutdownCh: 321 } 322 } 323 return rpcErr 324 } 325 326 // SnapshotRPC sends the snapshot request to one of the servers, reading from 327 // the streaming input and writing to the streaming output depending on the 328 // operation. 329 func (c *Client) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer, 330 replyFn structs.SnapshotReplyFn) error { 331 server := c.routers.FindServer() 332 if server == nil { 333 return structs.ErrNoServers 334 } 335 336 // Enforce the RPC limit. 337 metrics.IncrCounter([]string{"client", "rpc"}, 1) 338 if !c.rpcLimiter.Load().(*rate.Limiter).Allow() { 339 metrics.IncrCounter([]string{"client", "rpc", "exceeded"}, 1) 340 return structs.ErrRPCRateExceeded 341 } 342 343 // Request the operation. 344 var reply structs.SnapshotResponse 345 snap, err := SnapshotRPC(c.connPool, c.config.Datacenter, server.Addr, server.UseTLS, args, in, &reply) 346 if err != nil { 347 return err 348 } 349 defer func() { 350 if err := snap.Close(); err != nil { 351 c.logger.Printf("[WARN] consul: Failed closing snapshot stream: %v", err) 352 } 353 }() 354 355 // Let the caller peek at the reply. 356 if replyFn != nil { 357 if err := replyFn(&reply); err != nil { 358 return nil 359 } 360 } 361 362 // Stream the snapshot. 363 if out != nil { 364 if _, err := io.Copy(out, snap); err != nil { 365 return fmt.Errorf("failed to stream snapshot: %v", err) 366 } 367 } 368 369 return nil 370 } 371 372 // Stats is used to return statistics for debugging and insight 373 // for various sub-systems 374 func (c *Client) Stats() map[string]map[string]string { 375 numServers := c.routers.NumServers() 376 377 toString := func(v uint64) string { 378 return strconv.FormatUint(v, 10) 379 } 380 stats := map[string]map[string]string{ 381 "consul": map[string]string{ 382 "server": "false", 383 "known_servers": toString(uint64(numServers)), 384 }, 385 "serf_lan": c.serf.Stats(), 386 "runtime": runtimeStats(), 387 } 388 389 if c.ACLsEnabled() { 390 if c.UseLegacyACLs() { 391 stats["consul"]["acl"] = "legacy" 392 } else { 393 stats["consul"]["acl"] = "enabled" 394 } 395 } else { 396 stats["consul"]["acl"] = "disabled" 397 } 398 399 for outerKey, outerValue := range c.enterpriseStats() { 400 if _, ok := stats[outerKey]; ok { 401 for innerKey, innerValue := range outerValue { 402 stats[outerKey][innerKey] = innerValue 403 } 404 } else { 405 stats[outerKey] = outerValue 406 } 407 } 408 409 return stats 410 } 411 412 // GetLANCoordinate returns the network coordinate of the current node, as 413 // maintained by Serf. 414 func (c *Client) GetLANCoordinate() (lib.CoordinateSet, error) { 415 lan, err := c.serf.GetCoordinate() 416 if err != nil { 417 return nil, err 418 } 419 420 cs := lib.CoordinateSet{c.config.Segment: lan} 421 return cs, nil 422 } 423 424 // ReloadConfig is used to have the Client do an online reload of 425 // relevant configuration information 426 func (c *Client) ReloadConfig(config *Config) error { 427 c.rpcLimiter.Store(rate.NewLimiter(config.RPCRate, config.RPCMaxBurst)) 428 return nil 429 }