github.com/thanos-io/thanos@v0.32.5/internal/cortex/chunk/cache/memcached_client.go (about)

     1  // Copyright (c) The Cortex Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package cache
     5  
     6  import (
     7  	"context"
     8  	"flag"
     9  	"fmt"
    10  	"net"
    11  	"sort"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/bradfitz/gomemcache/memcache"
    17  	"github.com/go-kit/log"
    18  	"github.com/go-kit/log/level"
    19  	"github.com/prometheus/client_golang/prometheus"
    20  	"github.com/prometheus/client_golang/prometheus/promauto"
    21  	"github.com/sony/gobreaker"
    22  	"github.com/thanos-io/thanos/pkg/discovery/dns"
    23  )
    24  
    25  // MemcachedClient interface exists for mocking memcacheClient.
    26  type MemcachedClient interface {
    27  	GetMulti(keys []string) (map[string]*memcache.Item, error)
    28  	Set(item *memcache.Item) error
    29  }
    30  
    31  type serverSelector interface {
    32  	memcache.ServerSelector
    33  	SetServers(servers ...string) error
    34  }
    35  
    36  // memcachedClient is a memcache client that gets its server list from SRV
    37  // records, and periodically updates that ServerList.
    38  type memcachedClient struct {
    39  	sync.Mutex
    40  	name string
    41  	*memcache.Client
    42  	serverList serverSelector
    43  
    44  	hostname string
    45  	service  string
    46  
    47  	addresses []string
    48  	provider  *dns.Provider
    49  
    50  	cbs        map[ /*address*/ string]*gobreaker.CircuitBreaker
    51  	cbFailures uint
    52  	cbTimeout  time.Duration
    53  	cbInterval time.Duration
    54  
    55  	maxItemSize int
    56  
    57  	quit chan struct{}
    58  	wait sync.WaitGroup
    59  
    60  	numServers prometheus.Gauge
    61  	skipped    prometheus.Counter
    62  
    63  	logger log.Logger
    64  }
    65  
    66  // MemcachedClientConfig defines how a MemcachedClient should be constructed.
    67  type MemcachedClientConfig struct {
    68  	Host           string        `yaml:"host"`
    69  	Service        string        `yaml:"service"`
    70  	Addresses      string        `yaml:"addresses"` // EXPERIMENTAL.
    71  	Timeout        time.Duration `yaml:"timeout"`
    72  	MaxIdleConns   int           `yaml:"max_idle_conns"`
    73  	MaxItemSize    int           `yaml:"max_item_size"`
    74  	UpdateInterval time.Duration `yaml:"update_interval"`
    75  	ConsistentHash bool          `yaml:"consistent_hash"`
    76  	CBFailures     uint          `yaml:"circuit_breaker_consecutive_failures"`
    77  	CBTimeout      time.Duration `yaml:"circuit_breaker_timeout"`  // reset error count after this long
    78  	CBInterval     time.Duration `yaml:"circuit_breaker_interval"` // remain closed for this long after CBFailures errors
    79  }
    80  
    81  // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
    82  func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
    83  	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use. If empty and if addresses is unset, no memcached will be used.")
    84  	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.")
    85  	f.StringVar(&cfg.Addresses, prefix+"memcached.addresses", "", description+"EXPERIMENTAL: Comma separated addresses list in DNS Service Discovery format: https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery")
    86  	f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.")
    87  	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
    88  	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
    89  	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
    90  	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 10, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
    91  	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
    92  	f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
    93  	f.IntVar(&cfg.MaxItemSize, prefix+"memcached.max-item-size", 0, description+"The maximum size of an item stored in memcached. Bigger items are not stored. If set to 0, no maximum size is enforced.")
    94  }
    95  
    96  // NewMemcachedClient creates a new MemcacheClient that gets its server list
    97  // from SRV and updates the server list on a regular basis.
    98  func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer, logger log.Logger) MemcachedClient {
    99  	var selector serverSelector
   100  	if cfg.ConsistentHash {
   101  		selector = &MemcachedJumpHashSelector{}
   102  	} else {
   103  		selector = &memcache.ServerList{}
   104  	}
   105  
   106  	client := memcache.NewFromSelector(selector)
   107  	client.Timeout = cfg.Timeout
   108  	client.MaxIdleConns = cfg.MaxIdleConns
   109  
   110  	dnsProviderRegisterer := prometheus.WrapRegistererWithPrefix("cortex_", prometheus.WrapRegistererWith(prometheus.Labels{
   111  		"name": name,
   112  	}, r))
   113  
   114  	newClient := &memcachedClient{
   115  		name:        name,
   116  		Client:      client,
   117  		serverList:  selector,
   118  		hostname:    cfg.Host,
   119  		service:     cfg.Service,
   120  		logger:      logger,
   121  		provider:    dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
   122  		cbs:         make(map[string]*gobreaker.CircuitBreaker),
   123  		cbFailures:  cfg.CBFailures,
   124  		cbInterval:  cfg.CBInterval,
   125  		cbTimeout:   cfg.CBTimeout,
   126  		maxItemSize: cfg.MaxItemSize,
   127  		quit:        make(chan struct{}),
   128  
   129  		numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
   130  			Namespace:   "cortex",
   131  			Name:        "memcache_client_servers",
   132  			Help:        "The number of memcache servers discovered.",
   133  			ConstLabels: prometheus.Labels{"name": name},
   134  		}),
   135  
   136  		skipped: promauto.With(r).NewCounter(prometheus.CounterOpts{
   137  			Namespace:   "cortex",
   138  			Name:        "memcache_client_set_skip_total",
   139  			Help:        "Total number of skipped set operations because of the value is larger than the max-item-size.",
   140  			ConstLabels: prometheus.Labels{"name": name},
   141  		}),
   142  	}
   143  	if cfg.CBFailures > 0 {
   144  		newClient.Client.DialTimeout = newClient.dialViaCircuitBreaker
   145  	}
   146  
   147  	if len(cfg.Addresses) > 0 {
   148  		newClient.addresses = strings.Split(cfg.Addresses, ",")
   149  	}
   150  
   151  	err := newClient.updateMemcacheServers()
   152  	if err != nil {
   153  		level.Error(logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err)
   154  	}
   155  
   156  	newClient.wait.Add(1)
   157  	go newClient.updateLoop(cfg.UpdateInterval)
   158  	return newClient
   159  }
   160  
   161  func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) {
   162  	level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from-state", from, "to-state", to)
   163  }
   164  
   165  func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) {
   166  	c.Lock()
   167  	cb := c.cbs[address]
   168  	if cb == nil {
   169  		cb = gobreaker.NewCircuitBreaker(gobreaker.Settings{
   170  			Name:          c.name + ":" + address,
   171  			Interval:      c.cbInterval,
   172  			Timeout:       c.cbTimeout,
   173  			OnStateChange: c.circuitBreakerStateChange,
   174  			ReadyToTrip: func(counts gobreaker.Counts) bool {
   175  				return uint(counts.ConsecutiveFailures) > c.cbFailures
   176  			},
   177  		})
   178  		c.cbs[address] = cb
   179  	}
   180  	c.Unlock()
   181  
   182  	conn, err := cb.Execute(func() (interface{}, error) {
   183  		return net.DialTimeout(network, address, timeout)
   184  	})
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  	return conn.(net.Conn), nil
   189  }
   190  
   191  func (c *memcachedClient) updateLoop(updateInterval time.Duration) {
   192  	defer c.wait.Done()
   193  	ticker := time.NewTicker(updateInterval)
   194  	for {
   195  		select {
   196  		case <-ticker.C:
   197  			err := c.updateMemcacheServers()
   198  			if err != nil {
   199  				level.Warn(c.logger).Log("msg", "error updating memcache servers", "err", err)
   200  			}
   201  		case <-c.quit:
   202  			ticker.Stop()
   203  			return
   204  		}
   205  	}
   206  }
   207  
   208  // updateMemcacheServers sets a memcache server list from SRV records. SRV
   209  // priority & weight are ignored.
   210  func (c *memcachedClient) updateMemcacheServers() error {
   211  	var servers []string
   212  
   213  	if len(c.addresses) > 0 {
   214  		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   215  		defer cancel()
   216  
   217  		if err := c.provider.Resolve(ctx, c.addresses); err != nil {
   218  			return err
   219  		}
   220  		servers = c.provider.Addresses()
   221  	} else {
   222  		_, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname)
   223  		if err != nil {
   224  			return err
   225  		}
   226  		for _, srv := range addrs {
   227  			servers = append(servers, fmt.Sprintf("%s:%d", srv.Target, srv.Port))
   228  		}
   229  	}
   230  
   231  	if len(servers) > 0 {
   232  		// Copy across circuit-breakers for current set of addresses, thus
   233  		// leaving behind any for servers we won't talk to again
   234  		c.Lock()
   235  		newCBs := make(map[string]*gobreaker.CircuitBreaker, len(servers))
   236  		for _, address := range servers {
   237  			if cb, exists := c.cbs[address]; exists {
   238  				newCBs[address] = cb
   239  			}
   240  		}
   241  		c.cbs = newCBs
   242  		c.Unlock()
   243  	}
   244  
   245  	// ServerList deterministically maps keys to _index_ of the server list.
   246  	// Since DNS returns records in different order each time, we sort to
   247  	// guarantee best possible match between nodes.
   248  	sort.Strings(servers)
   249  	c.numServers.Set(float64(len(servers)))
   250  	return c.serverList.SetServers(servers...)
   251  }