github.com/Iqoqo/consul@v1.4.5/agent/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"log"
     7  	"os"
     8  	"strconv"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/armon/go-metrics"
    14  	"github.com/hashicorp/consul/agent/pool"
    15  	"github.com/hashicorp/consul/agent/router"
    16  	"github.com/hashicorp/consul/agent/structs"
    17  	"github.com/hashicorp/consul/lib"
    18  	"github.com/hashicorp/consul/tlsutil"
    19  	"github.com/hashicorp/serf/serf"
    20  	"golang.org/x/time/rate"
    21  )
    22  
    23  const (
    24  	// clientRPCConnMaxIdle controls how long we keep an idle connection
    25  	// open to a server.  127s was chosen as the first prime above 120s
    26  	// (arbitrarily chose to use a prime) with the intent of reusing
    27  	// connections who are used by once-a-minute cron(8) jobs *and* who
    28  	// use a 60s jitter window (e.g. in vixie cron job execution can
    29  	// drift by up to 59s per job, or 119s for a once-a-minute cron job).
    30  	clientRPCConnMaxIdle = 127 * time.Second
    31  
    32  	// clientMaxStreams controls how many idle streams we keep
    33  	// open to a server
    34  	clientMaxStreams = 32
    35  
    36  	// serfEventBacklog is the maximum number of unprocessed Serf Events
    37  	// that will be held in queue before new serf events block.  A
    38  	// blocking serf event queue is a bad thing.
    39  	serfEventBacklog = 256
    40  
    41  	// serfEventBacklogWarning is the threshold at which point log
    42  	// warnings will be emitted indicating a problem when processing serf
    43  	// events.
    44  	serfEventBacklogWarning = 200
    45  )
    46  
    47  // Client is Consul client which uses RPC to communicate with the
    48  // services for service discovery, health checking, and DC forwarding.
    49  type Client struct {
    50  	config *Config
    51  
    52  	// acls is used to resolve tokens to effective policies
    53  	acls *ACLResolver
    54  
    55  	// DEPRECATED (ACL-Legacy-Compat) - Only needed while we support both
    56  	// useNewACLs is a flag to indicate whether we are using the new ACL system
    57  	useNewACLs int32
    58  
    59  	// Connection pool to consul servers
    60  	connPool *pool.ConnPool
    61  
    62  	// routers is responsible for the selection and maintenance of
    63  	// Consul servers this agent uses for RPC requests
    64  	routers *router.Manager
    65  
    66  	// rpcLimiter is used to rate limit the total number of RPCs initiated
    67  	// from an agent.
    68  	rpcLimiter atomic.Value
    69  
    70  	// eventCh is used to receive events from the
    71  	// serf cluster in the datacenter
    72  	eventCh chan serf.Event
    73  
    74  	// Logger uses the provided LogOutput
    75  	logger *log.Logger
    76  
    77  	// serf is the Serf cluster maintained inside the DC
    78  	// which contains all the DC nodes
    79  	serf *serf.Serf
    80  
    81  	shutdown     bool
    82  	shutdownCh   chan struct{}
    83  	shutdownLock sync.Mutex
    84  
    85  	// embedded struct to hold all the enterprise specific data
    86  	EnterpriseClient
    87  }
    88  
    89  // NewClient is used to construct a new Consul client from the configuration,
    90  // potentially returning an error.
    91  // NewClient only used to help setting up a client for testing. Normal code
    92  // exercises NewClientLogger.
    93  func NewClient(config *Config) (*Client, error) {
    94  	c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil)
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	return NewClientLogger(config, nil, c)
    99  }
   100  
   101  func NewClientLogger(config *Config, logger *log.Logger, tlsConfigurator *tlsutil.Configurator) (*Client, error) {
   102  	// Check the protocol version
   103  	if err := config.CheckProtocolVersion(); err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	// Check for a data directory!
   108  	if config.DataDir == "" {
   109  		return nil, fmt.Errorf("Config must provide a DataDir")
   110  	}
   111  
   112  	// Sanity check the ACLs
   113  	if err := config.CheckACL(); err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	// Ensure we have a log output
   118  	if config.LogOutput == nil {
   119  		config.LogOutput = os.Stderr
   120  	}
   121  
   122  	// Create a logger
   123  	if logger == nil {
   124  		logger = log.New(config.LogOutput, "", log.LstdFlags)
   125  	}
   126  
   127  	connPool := &pool.ConnPool{
   128  		SrcAddr:    config.RPCSrcAddr,
   129  		LogOutput:  config.LogOutput,
   130  		MaxTime:    clientRPCConnMaxIdle,
   131  		MaxStreams: clientMaxStreams,
   132  		TLSWrapper: tlsConfigurator.OutgoingRPCWrapper(),
   133  		ForceTLS:   config.VerifyOutgoing,
   134  	}
   135  
   136  	// Create client
   137  	c := &Client{
   138  		config:     config,
   139  		connPool:   connPool,
   140  		eventCh:    make(chan serf.Event, serfEventBacklog),
   141  		logger:     logger,
   142  		shutdownCh: make(chan struct{}),
   143  	}
   144  
   145  	c.rpcLimiter.Store(rate.NewLimiter(config.RPCRate, config.RPCMaxBurst))
   146  
   147  	if err := c.initEnterprise(); err != nil {
   148  		c.Shutdown()
   149  		return nil, err
   150  	}
   151  
   152  	c.useNewACLs = 0
   153  	aclConfig := ACLResolverConfig{
   154  		Config:      config,
   155  		Delegate:    c,
   156  		Logger:      logger,
   157  		AutoDisable: true,
   158  		CacheConfig: clientACLCacheConfig,
   159  		Sentinel:    nil,
   160  	}
   161  	var err error
   162  	if c.acls, err = NewACLResolver(&aclConfig); err != nil {
   163  		c.Shutdown()
   164  		return nil, fmt.Errorf("Failed to create ACL resolver: %v", err)
   165  	}
   166  
   167  	// Initialize the LAN Serf
   168  	c.serf, err = c.setupSerf(config.SerfLANConfig,
   169  		c.eventCh, serfLANSnapshot)
   170  	if err != nil {
   171  		c.Shutdown()
   172  		return nil, fmt.Errorf("Failed to start lan serf: %v", err)
   173  	}
   174  
   175  	if c.acls.ACLsEnabled() {
   176  		go c.monitorACLMode()
   177  	}
   178  
   179  	// Start maintenance task for servers
   180  	c.routers = router.New(c.logger, c.shutdownCh, c.serf, c.connPool)
   181  	go c.routers.Start()
   182  
   183  	// Start LAN event handlers after the router is complete since the event
   184  	// handlers depend on the router and the router depends on Serf.
   185  	go c.lanEventHandler()
   186  
   187  	if err := c.startEnterprise(); err != nil {
   188  		c.Shutdown()
   189  		return nil, err
   190  	}
   191  
   192  	return c, nil
   193  }
   194  
   195  // Shutdown is used to shutdown the client
   196  func (c *Client) Shutdown() error {
   197  	c.logger.Printf("[INFO] consul: shutting down client")
   198  	c.shutdownLock.Lock()
   199  	defer c.shutdownLock.Unlock()
   200  
   201  	if c.shutdown {
   202  		return nil
   203  	}
   204  
   205  	c.shutdown = true
   206  	close(c.shutdownCh)
   207  
   208  	if c.serf != nil {
   209  		c.serf.Shutdown()
   210  	}
   211  
   212  	// Close the connection pool
   213  	c.connPool.Shutdown()
   214  	return nil
   215  }
   216  
   217  // Leave is used to prepare for a graceful shutdown
   218  func (c *Client) Leave() error {
   219  	c.logger.Printf("[INFO] consul: client starting leave")
   220  
   221  	// Leave the LAN pool
   222  	if c.serf != nil {
   223  		if err := c.serf.Leave(); err != nil {
   224  			c.logger.Printf("[ERR] consul: Failed to leave LAN Serf cluster: %v", err)
   225  		}
   226  	}
   227  	return nil
   228  }
   229  
   230  // JoinLAN is used to have Consul client join the inner-DC pool
   231  // The target address should be another node inside the DC
   232  // listening on the Serf LAN address
   233  func (c *Client) JoinLAN(addrs []string) (int, error) {
   234  	return c.serf.Join(addrs, true)
   235  }
   236  
   237  // LocalMember is used to return the local node
   238  func (c *Client) LocalMember() serf.Member {
   239  	return c.serf.LocalMember()
   240  }
   241  
   242  // LANMembers is used to return the members of the LAN cluster
   243  func (c *Client) LANMembers() []serf.Member {
   244  	return c.serf.Members()
   245  }
   246  
   247  // LANMembersAllSegments returns members from all segments.
   248  func (c *Client) LANMembersAllSegments() ([]serf.Member, error) {
   249  	return c.serf.Members(), nil
   250  }
   251  
   252  // LANSegmentMembers only returns our own segment's members, because clients
   253  // can't be in multiple segments.
   254  func (c *Client) LANSegmentMembers(segment string) ([]serf.Member, error) {
   255  	if segment == c.config.Segment {
   256  		return c.LANMembers(), nil
   257  	}
   258  
   259  	return nil, fmt.Errorf("segment %q not found", segment)
   260  }
   261  
   262  // RemoveFailedNode is used to remove a failed node from the cluster
   263  func (c *Client) RemoveFailedNode(node string) error {
   264  	return c.serf.RemoveFailedNode(node)
   265  }
   266  
   267  // KeyManagerLAN returns the LAN Serf keyring manager
   268  func (c *Client) KeyManagerLAN() *serf.KeyManager {
   269  	return c.serf.KeyManager()
   270  }
   271  
   272  // Encrypted determines if gossip is encrypted
   273  func (c *Client) Encrypted() bool {
   274  	return c.serf.EncryptionEnabled()
   275  }
   276  
   277  // RPC is used to forward an RPC call to a consul server, or fail if no servers
   278  func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
   279  	// This is subtle but we start measuring the time on the client side
   280  	// right at the time of the first request, vs. on the first retry as
   281  	// is done on the server side inside forward(). This is because the
   282  	// servers may already be applying the RPCHoldTimeout up there, so by
   283  	// starting the timer here we won't potentially double up the delay.
   284  	// TODO (slackpad) Plumb a deadline here with a context.
   285  	firstCheck := time.Now()
   286  
   287  TRY:
   288  	server := c.routers.FindServer()
   289  	if server == nil {
   290  		return structs.ErrNoServers
   291  	}
   292  
   293  	// Enforce the RPC limit.
   294  	metrics.IncrCounter([]string{"client", "rpc"}, 1)
   295  	if !c.rpcLimiter.Load().(*rate.Limiter).Allow() {
   296  		metrics.IncrCounter([]string{"client", "rpc", "exceeded"}, 1)
   297  		return structs.ErrRPCRateExceeded
   298  	}
   299  
   300  	// Make the request.
   301  	rpcErr := c.connPool.RPC(c.config.Datacenter, server.Addr, server.Version, method, server.UseTLS, args, reply)
   302  	if rpcErr == nil {
   303  		return nil
   304  	}
   305  
   306  	// Move off to another server, and see if we can retry.
   307  	c.logger.Printf("[ERR] consul: %q RPC failed to server %s: %v", method, server.Addr, rpcErr)
   308  	metrics.IncrCounterWithLabels([]string{"client", "rpc", "failed"}, 1, []metrics.Label{{Name: "server", Value: server.Name}})
   309  	c.routers.NotifyFailedServer(server)
   310  	if retry := canRetry(args, rpcErr); !retry {
   311  		return rpcErr
   312  	}
   313  
   314  	// We can wait a bit and retry!
   315  	if time.Since(firstCheck) < c.config.RPCHoldTimeout {
   316  		jitter := lib.RandomStagger(c.config.RPCHoldTimeout / jitterFraction)
   317  		select {
   318  		case <-time.After(jitter):
   319  			goto TRY
   320  		case <-c.shutdownCh:
   321  		}
   322  	}
   323  	return rpcErr
   324  }
   325  
   326  // SnapshotRPC sends the snapshot request to one of the servers, reading from
   327  // the streaming input and writing to the streaming output depending on the
   328  // operation.
   329  func (c *Client) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer,
   330  	replyFn structs.SnapshotReplyFn) error {
   331  	server := c.routers.FindServer()
   332  	if server == nil {
   333  		return structs.ErrNoServers
   334  	}
   335  
   336  	// Enforce the RPC limit.
   337  	metrics.IncrCounter([]string{"client", "rpc"}, 1)
   338  	if !c.rpcLimiter.Load().(*rate.Limiter).Allow() {
   339  		metrics.IncrCounter([]string{"client", "rpc", "exceeded"}, 1)
   340  		return structs.ErrRPCRateExceeded
   341  	}
   342  
   343  	// Request the operation.
   344  	var reply structs.SnapshotResponse
   345  	snap, err := SnapshotRPC(c.connPool, c.config.Datacenter, server.Addr, server.UseTLS, args, in, &reply)
   346  	if err != nil {
   347  		return err
   348  	}
   349  	defer func() {
   350  		if err := snap.Close(); err != nil {
   351  			c.logger.Printf("[WARN] consul: Failed closing snapshot stream: %v", err)
   352  		}
   353  	}()
   354  
   355  	// Let the caller peek at the reply.
   356  	if replyFn != nil {
   357  		if err := replyFn(&reply); err != nil {
   358  			return nil
   359  		}
   360  	}
   361  
   362  	// Stream the snapshot.
   363  	if out != nil {
   364  		if _, err := io.Copy(out, snap); err != nil {
   365  			return fmt.Errorf("failed to stream snapshot: %v", err)
   366  		}
   367  	}
   368  
   369  	return nil
   370  }
   371  
   372  // Stats is used to return statistics for debugging and insight
   373  // for various sub-systems
   374  func (c *Client) Stats() map[string]map[string]string {
   375  	numServers := c.routers.NumServers()
   376  
   377  	toString := func(v uint64) string {
   378  		return strconv.FormatUint(v, 10)
   379  	}
   380  	stats := map[string]map[string]string{
   381  		"consul": map[string]string{
   382  			"server":        "false",
   383  			"known_servers": toString(uint64(numServers)),
   384  		},
   385  		"serf_lan": c.serf.Stats(),
   386  		"runtime":  runtimeStats(),
   387  	}
   388  
   389  	if c.ACLsEnabled() {
   390  		if c.UseLegacyACLs() {
   391  			stats["consul"]["acl"] = "legacy"
   392  		} else {
   393  			stats["consul"]["acl"] = "enabled"
   394  		}
   395  	} else {
   396  		stats["consul"]["acl"] = "disabled"
   397  	}
   398  
   399  	for outerKey, outerValue := range c.enterpriseStats() {
   400  		if _, ok := stats[outerKey]; ok {
   401  			for innerKey, innerValue := range outerValue {
   402  				stats[outerKey][innerKey] = innerValue
   403  			}
   404  		} else {
   405  			stats[outerKey] = outerValue
   406  		}
   407  	}
   408  
   409  	return stats
   410  }
   411  
   412  // GetLANCoordinate returns the network coordinate of the current node, as
   413  // maintained by Serf.
   414  func (c *Client) GetLANCoordinate() (lib.CoordinateSet, error) {
   415  	lan, err := c.serf.GetCoordinate()
   416  	if err != nil {
   417  		return nil, err
   418  	}
   419  
   420  	cs := lib.CoordinateSet{c.config.Segment: lan}
   421  	return cs, nil
   422  }
   423  
   424  // ReloadConfig is used to have the Client do an online reload of
   425  // relevant configuration information
   426  func (c *Client) ReloadConfig(config *Config) error {
   427  	c.rpcLimiter.Store(rate.NewLimiter(config.RPCRate, config.RPCMaxBurst))
   428  	return nil
   429  }