github.com/thanos-io/thanos@v0.32.5/pkg/cacheutil/memcached_client.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package cacheutil
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/bradfitz/gomemcache/memcache"
    15  	"github.com/go-kit/log"
    16  	"github.com/go-kit/log/level"
    17  	"github.com/pkg/errors"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  	"github.com/prometheus/client_golang/prometheus/promauto"
    20  	"gopkg.in/yaml.v2"
    21  
    22  	"github.com/thanos-io/thanos/pkg/discovery/dns"
    23  	memcacheDiscovery "github.com/thanos-io/thanos/pkg/discovery/memcache"
    24  	"github.com/thanos-io/thanos/pkg/extprom"
    25  	"github.com/thanos-io/thanos/pkg/gate"
    26  	"github.com/thanos-io/thanos/pkg/model"
    27  )
    28  
    29  const (
    30  	opSet                 = "set"
    31  	opSetMulti            = "setmulti"
    32  	opGetMulti            = "getmulti"
    33  	reasonMaxItemSize     = "max-item-size"
    34  	reasonAsyncBufferFull = "async-buffer-full"
    35  	reasonMalformedKey    = "malformed-key"
    36  	reasonTimeout         = "timeout"
    37  	reasonServerError     = "server-error"
    38  	reasonNetworkError    = "network-error"
    39  	reasonOther           = "other"
    40  )
    41  
    42  var (
    43  	errMemcachedAsyncBufferFull                = errors.New("the async buffer is full")
    44  	errMemcachedConfigNoAddrs                  = errors.New("no memcached addresses provided")
    45  	errMemcachedDNSUpdateIntervalNotPositive   = errors.New("DNS provider update interval must be positive")
    46  	errMemcachedMaxAsyncConcurrencyNotPositive = errors.New("max async concurrency must be positive")
    47  
    48  	defaultMemcachedClientConfig = MemcachedClientConfig{
    49  		Timeout:                   500 * time.Millisecond,
    50  		MaxIdleConnections:        100,
    51  		MaxAsyncConcurrency:       20,
    52  		MaxAsyncBufferSize:        10000,
    53  		MaxItemSize:               model.Bytes(1024 * 1024),
    54  		MaxGetMultiConcurrency:    100,
    55  		MaxGetMultiBatchSize:      0,
    56  		DNSProviderUpdateInterval: 10 * time.Second,
    57  		AutoDiscovery:             false,
    58  	}
    59  )
    60  
    61  var (
    62  	_ RemoteCacheClient = (*memcachedClient)(nil)
    63  	_ RemoteCacheClient = (*RedisClient)(nil)
    64  )
    65  
    66  // RemoteCacheClient is a high level client to interact with remote cache.
    67  type RemoteCacheClient interface {
    68  	// GetMulti fetches multiple keys at once from remoteCache. In case of error,
    69  	// an empty map is returned and the error tracked/logged.
    70  	GetMulti(ctx context.Context, keys []string) map[string][]byte
    71  
    72  	// SetAsync enqueues an asynchronous operation to store a key into memcached.
    73  	// Returns an error in case it fails to enqueue the operation. In case the
    74  	// underlying async operation will fail, the error will be tracked/logged.
    75  	SetAsync(key string, value []byte, ttl time.Duration) error
    76  
    77  	// Stop client and release underlying resources.
    78  	Stop()
    79  }
    80  
    81  // MemcachedClient for compatible.
    82  type MemcachedClient = RemoteCacheClient
    83  
    84  // memcachedClientBackend is an interface used to mock the underlying client in tests.
    85  type memcachedClientBackend interface {
    86  	GetMulti(keys []string) (map[string]*memcache.Item, error)
    87  	Set(item *memcache.Item) error
    88  }
    89  
    90  // updatableServerSelector extends the interface used for picking a memcached server
    91  // for a key to allow servers to be updated at runtime. It allows the selector used
    92  // by the client to be mocked in tests.
    93  type updatableServerSelector interface {
    94  	memcache.ServerSelector
    95  
    96  	// SetServers changes a ServerSelector's set of servers at runtime
    97  	// and is safe for concurrent use by multiple goroutines.
    98  	//
    99  	// SetServers returns an error if any of the server names fail to
   100  	// resolve. No attempt is made to connect to the server. If any
   101  	// error occurs, no changes are made to the internal server list.
   102  	SetServers(servers ...string) error
   103  }
   104  
   105  // MemcachedClientConfig is the config accepted by RemoteCacheClient.
   106  type MemcachedClientConfig struct {
   107  	// Addresses specifies the list of memcached addresses. The addresses get
   108  	// resolved with the DNS provider.
   109  	Addresses []string `yaml:"addresses"`
   110  
   111  	// Timeout specifies the socket read/write timeout.
   112  	Timeout time.Duration `yaml:"timeout"`
   113  
   114  	// MaxIdleConnections specifies the maximum number of idle connections that
   115  	// will be maintained per address. For better performances, this should be
   116  	// set to a number higher than your peak parallel requests.
   117  	MaxIdleConnections int `yaml:"max_idle_connections"`
   118  
   119  	// MaxAsyncConcurrency specifies the maximum number of SetAsync goroutines.
   120  	MaxAsyncConcurrency int `yaml:"max_async_concurrency"`
   121  
   122  	// MaxAsyncBufferSize specifies the queue buffer size for SetAsync operations.
   123  	MaxAsyncBufferSize int `yaml:"max_async_buffer_size"`
   124  
   125  	// MaxGetMultiConcurrency specifies the maximum number of concurrent GetMulti() operations.
   126  	// If set to 0, concurrency is unlimited.
   127  	MaxGetMultiConcurrency int `yaml:"max_get_multi_concurrency"`
   128  
   129  	// MaxItemSize specifies the maximum size of an item stored in memcached.
   130  	// Items bigger than MaxItemSize are skipped.
   131  	// If set to 0, no maximum size is enforced.
   132  	MaxItemSize model.Bytes `yaml:"max_item_size"`
   133  
   134  	// MaxGetMultiBatchSize specifies the maximum number of keys a single underlying
   135  	// GetMulti() should run. If more keys are specified, internally keys are splitted
   136  	// into multiple batches and fetched concurrently, honoring MaxGetMultiConcurrency parallelism.
   137  	// If set to 0, the max batch size is unlimited.
   138  	MaxGetMultiBatchSize int `yaml:"max_get_multi_batch_size"`
   139  
   140  	// DNSProviderUpdateInterval specifies the DNS discovery update interval.
   141  	DNSProviderUpdateInterval time.Duration `yaml:"dns_provider_update_interval"`
   142  
   143  	// AutoDiscovery configures memached client to perform auto-discovery instead of DNS resolution
   144  	AutoDiscovery bool `yaml:"auto_discovery"`
   145  }
   146  
   147  func (c *MemcachedClientConfig) validate() error {
   148  	if len(c.Addresses) == 0 {
   149  		return errMemcachedConfigNoAddrs
   150  	}
   151  
   152  	// Avoid panic in time ticker.
   153  	if c.DNSProviderUpdateInterval <= 0 {
   154  		return errMemcachedDNSUpdateIntervalNotPositive
   155  	}
   156  
   157  	// Set async only available when MaxAsyncConcurrency > 0.
   158  	if c.MaxAsyncConcurrency <= 0 {
   159  		return errMemcachedMaxAsyncConcurrencyNotPositive
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  // parseMemcachedClientConfig unmarshals a buffer into a MemcachedClientConfig with default values.
   166  func parseMemcachedClientConfig(conf []byte) (MemcachedClientConfig, error) {
   167  	config := defaultMemcachedClientConfig
   168  	if err := yaml.Unmarshal(conf, &config); err != nil {
   169  		return MemcachedClientConfig{}, err
   170  	}
   171  
   172  	return config, nil
   173  }
   174  
   175  type memcachedClient struct {
   176  	logger   log.Logger
   177  	config   MemcachedClientConfig
   178  	client   memcachedClientBackend
   179  	selector updatableServerSelector
   180  
   181  	// Name provides an identifier for the instantiated Client
   182  	name string
   183  
   184  	// Address provider used to keep the memcached servers list updated.
   185  	addressProvider AddressProvider
   186  
   187  	// Gate used to enforce the max number of concurrent GetMulti() operations.
   188  	getMultiGate gate.Gate
   189  
   190  	// Tracked metrics.
   191  	clientInfo prometheus.GaugeFunc
   192  	operations *prometheus.CounterVec
   193  	failures   *prometheus.CounterVec
   194  	skipped    *prometheus.CounterVec
   195  	duration   *prometheus.HistogramVec
   196  	dataSize   *prometheus.HistogramVec
   197  
   198  	p *asyncOperationProcessor
   199  }
   200  
   201  // AddressProvider performs node address resolution given a list of clusters.
   202  type AddressProvider interface {
   203  	// Resolves the provided list of memcached cluster to the actual nodes
   204  	Resolve(context.Context, []string) error
   205  
   206  	// Returns the nodes
   207  	Addresses() []string
   208  }
   209  
   210  type memcachedGetMultiResult struct {
   211  	items map[string]*memcache.Item
   212  	err   error
   213  }
   214  
   215  // NewMemcachedClient makes a new RemoteCacheClient.
   216  func NewMemcachedClient(logger log.Logger, name string, conf []byte, reg prometheus.Registerer) (*memcachedClient, error) {
   217  	config, err := parseMemcachedClientConfig(conf)
   218  	if err != nil {
   219  		return nil, err
   220  	}
   221  
   222  	return NewMemcachedClientWithConfig(logger, name, config, reg)
   223  }
   224  
   225  // NewMemcachedClientWithConfig makes a new RemoteCacheClient.
   226  func NewMemcachedClientWithConfig(logger log.Logger, name string, config MemcachedClientConfig, reg prometheus.Registerer) (*memcachedClient, error) {
   227  	if err := config.validate(); err != nil {
   228  		return nil, err
   229  	}
   230  
   231  	// We use a custom servers selector in order to use a jump hash
   232  	// for servers selection.
   233  	selector := &MemcachedJumpHashSelector{}
   234  
   235  	client := memcache.NewFromSelector(selector)
   236  	client.Timeout = config.Timeout
   237  	client.MaxIdleConns = config.MaxIdleConnections
   238  
   239  	if reg != nil {
   240  		reg = prometheus.WrapRegistererWith(prometheus.Labels{"name": name}, reg)
   241  	}
   242  	return newMemcachedClient(logger, client, selector, config, reg, name)
   243  }
   244  
   245  func newMemcachedClient(
   246  	logger log.Logger,
   247  	client memcachedClientBackend,
   248  	selector updatableServerSelector,
   249  	config MemcachedClientConfig,
   250  	reg prometheus.Registerer,
   251  	name string,
   252  ) (*memcachedClient, error) {
   253  	promRegisterer := extprom.WrapRegistererWithPrefix("thanos_memcached_", reg)
   254  
   255  	var addressProvider AddressProvider
   256  	if config.AutoDiscovery {
   257  		addressProvider = memcacheDiscovery.NewProvider(
   258  			logger,
   259  			promRegisterer,
   260  			config.Timeout,
   261  		)
   262  	} else {
   263  		addressProvider = dns.NewProvider(
   264  			logger,
   265  			extprom.WrapRegistererWithPrefix("thanos_memcached_", reg),
   266  			dns.MiekgdnsResolverType,
   267  		)
   268  	}
   269  
   270  	c := &memcachedClient{
   271  		logger:          log.With(logger, "name", name),
   272  		config:          config,
   273  		client:          client,
   274  		selector:        selector,
   275  		addressProvider: addressProvider,
   276  		getMultiGate: gate.New(
   277  			extprom.WrapRegistererWithPrefix("thanos_memcached_getmulti_", reg),
   278  			config.MaxGetMultiConcurrency,
   279  			gate.Gets,
   280  		),
   281  		p: newAsyncOperationProcessor(config.MaxAsyncBufferSize, config.MaxAsyncConcurrency),
   282  	}
   283  
   284  	c.clientInfo = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
   285  		Name: "thanos_memcached_client_info",
   286  		Help: "A metric with a constant '1' value labeled by configuration options from which memcached client was configured.",
   287  		ConstLabels: prometheus.Labels{
   288  			"timeout":                      config.Timeout.String(),
   289  			"max_idle_connections":         strconv.Itoa(config.MaxIdleConnections),
   290  			"max_async_concurrency":        strconv.Itoa(config.MaxAsyncConcurrency),
   291  			"max_async_buffer_size":        strconv.Itoa(config.MaxAsyncBufferSize),
   292  			"max_item_size":                strconv.FormatUint(uint64(config.MaxItemSize), 10),
   293  			"max_get_multi_concurrency":    strconv.Itoa(config.MaxGetMultiConcurrency),
   294  			"max_get_multi_batch_size":     strconv.Itoa(config.MaxGetMultiBatchSize),
   295  			"dns_provider_update_interval": config.DNSProviderUpdateInterval.String(),
   296  		},
   297  	},
   298  		func() float64 { return 1 },
   299  	)
   300  
   301  	c.operations = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
   302  		Name: "thanos_memcached_operations_total",
   303  		Help: "Total number of operations against memcached.",
   304  	}, []string{"operation"})
   305  	c.operations.WithLabelValues(opGetMulti)
   306  	c.operations.WithLabelValues(opSet)
   307  
   308  	c.failures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
   309  		Name: "thanos_memcached_operation_failures_total",
   310  		Help: "Total number of operations against memcached that failed.",
   311  	}, []string{"operation", "reason"})
   312  	c.failures.WithLabelValues(opGetMulti, reasonTimeout)
   313  	c.failures.WithLabelValues(opGetMulti, reasonMalformedKey)
   314  	c.failures.WithLabelValues(opGetMulti, reasonServerError)
   315  	c.failures.WithLabelValues(opGetMulti, reasonNetworkError)
   316  	c.failures.WithLabelValues(opGetMulti, reasonOther)
   317  	c.failures.WithLabelValues(opSet, reasonTimeout)
   318  	c.failures.WithLabelValues(opSet, reasonMalformedKey)
   319  	c.failures.WithLabelValues(opSet, reasonServerError)
   320  	c.failures.WithLabelValues(opSet, reasonNetworkError)
   321  	c.failures.WithLabelValues(opSet, reasonOther)
   322  
   323  	c.skipped = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
   324  		Name: "thanos_memcached_operation_skipped_total",
   325  		Help: "Total number of operations against memcached that have been skipped.",
   326  	}, []string{"operation", "reason"})
   327  	c.skipped.WithLabelValues(opGetMulti, reasonMaxItemSize)
   328  	c.skipped.WithLabelValues(opSet, reasonMaxItemSize)
   329  	c.skipped.WithLabelValues(opSet, reasonAsyncBufferFull)
   330  
   331  	c.duration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
   332  		Name:    "thanos_memcached_operation_duration_seconds",
   333  		Help:    "Duration of operations against memcached.",
   334  		Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 3, 6, 10},
   335  	}, []string{"operation"})
   336  	c.duration.WithLabelValues(opGetMulti)
   337  	c.duration.WithLabelValues(opSet)
   338  
   339  	c.dataSize = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
   340  		Name: "thanos_memcached_operation_data_size_bytes",
   341  		Help: "Tracks the size of the data stored in and fetched from memcached.",
   342  		Buckets: []float64{
   343  			32, 256, 512, 1024, 32 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 32 * 1024 * 1024, 256 * 1024 * 1024, 512 * 1024 * 1024,
   344  		},
   345  	},
   346  		[]string{"operation"},
   347  	)
   348  	c.dataSize.WithLabelValues(opGetMulti)
   349  	c.dataSize.WithLabelValues(opSet)
   350  
   351  	// As soon as the client is created it must ensure that memcached server
   352  	// addresses are resolved, so we're going to trigger an initial addresses
   353  	// resolution here.
   354  	if err := c.resolveAddrs(); err != nil {
   355  		return nil, err
   356  	}
   357  
   358  	c.p.workers.Add(1)
   359  	go c.resolveAddrsLoop()
   360  
   361  	return c, nil
   362  }
   363  
   364  func (c *memcachedClient) Stop() {
   365  	c.p.Stop()
   366  }
   367  
   368  func (c *memcachedClient) SetAsync(key string, value []byte, ttl time.Duration) error {
   369  	// Skip hitting memcached at all if the item is bigger than the max allowed size.
   370  	if c.config.MaxItemSize > 0 && uint64(len(value)) > uint64(c.config.MaxItemSize) {
   371  		c.skipped.WithLabelValues(opSet, reasonMaxItemSize).Inc()
   372  		return nil
   373  	}
   374  
   375  	err := c.p.enqueueAsync(func() {
   376  		start := time.Now()
   377  		c.operations.WithLabelValues(opSet).Inc()
   378  
   379  		err := c.client.Set(&memcache.Item{
   380  			Key:        key,
   381  			Value:      value,
   382  			Expiration: int32(time.Now().Add(ttl).Unix()),
   383  		})
   384  		if err != nil {
   385  			// If the PickServer will fail for any reason the server address will be nil
   386  			// and so missing in the logs. We're OK with that (it's a best effort).
   387  			serverAddr, _ := c.selector.PickServer(key)
   388  			level.Debug(c.logger).Log(
   389  				"msg", "failed to store item to memcached",
   390  				"key", key,
   391  				"sizeBytes", len(value),
   392  				"server", serverAddr,
   393  				"err", err,
   394  			)
   395  			c.trackError(opSet, err)
   396  			return
   397  		}
   398  
   399  		c.dataSize.WithLabelValues(opSet).Observe(float64(len(value)))
   400  		c.duration.WithLabelValues(opSet).Observe(time.Since(start).Seconds())
   401  	})
   402  
   403  	if err == errMemcachedAsyncBufferFull {
   404  		c.skipped.WithLabelValues(opSet, reasonAsyncBufferFull).Inc()
   405  		level.Debug(c.logger).Log("msg", "failed to store item to memcached because the async buffer is full", "err", err, "size", len(c.p.asyncQueue))
   406  		return nil
   407  	}
   408  	return err
   409  }
   410  
   411  func (c *memcachedClient) GetMulti(ctx context.Context, keys []string) map[string][]byte {
   412  	if len(keys) == 0 {
   413  		return nil
   414  	}
   415  
   416  	batches, err := c.getMultiBatched(ctx, keys)
   417  	if err != nil {
   418  		level.Warn(c.logger).Log("msg", "failed to fetch items from memcached", "numKeys", len(keys), "firstKey", keys[0], "err", err)
   419  
   420  		// In case we have both results and an error, it means some batch requests
   421  		// failed and other succeeded. In this case we prefer to log it and move on,
   422  		// given returning some results from the cache is better than returning
   423  		// nothing.
   424  		if len(batches) == 0 {
   425  			return nil
   426  		}
   427  	}
   428  
   429  	hits := map[string][]byte{}
   430  	for _, items := range batches {
   431  		for key, item := range items {
   432  			hits[key] = item.Value
   433  		}
   434  	}
   435  
   436  	return hits
   437  }
   438  
   439  func (c *memcachedClient) getMultiBatched(ctx context.Context, keys []string) ([]map[string]*memcache.Item, error) {
   440  	// Do not batch if the input keys are less than the max batch size.
   441  	if (c.config.MaxGetMultiBatchSize <= 0) || (len(keys) <= c.config.MaxGetMultiBatchSize) {
   442  		// Even if we're not splitting the input into batches, make sure that our single request
   443  		// still counts against the concurrency limit.
   444  		if c.config.MaxGetMultiConcurrency > 0 {
   445  			if err := c.getMultiGate.Start(ctx); err != nil {
   446  				return nil, errors.Wrapf(err, "failed to wait for turn. Instance: %s", c.name)
   447  			}
   448  
   449  			defer c.getMultiGate.Done()
   450  		}
   451  
   452  		items, err := c.getMultiSingle(ctx, keys)
   453  		if err != nil {
   454  			return nil, err
   455  		}
   456  
   457  		return []map[string]*memcache.Item{items}, nil
   458  	}
   459  
   460  	// Calculate the number of expected results.
   461  	batchSize := c.config.MaxGetMultiBatchSize
   462  	numResults := len(keys) / batchSize
   463  	if len(keys)%batchSize != 0 {
   464  		numResults++
   465  	}
   466  
   467  	// If max concurrency is disabled, use a nil gate for the doWithBatch method which will
   468  	// not apply any limit to the number goroutines started to make batch requests in that case.
   469  	var getMultiGate gate.Gate
   470  	if c.config.MaxGetMultiConcurrency > 0 {
   471  		getMultiGate = c.getMultiGate
   472  	}
   473  
   474  	// Sort keys based on which memcached server they will be sharded to. Sorting keys that
   475  	// are on the same server together before splitting into batches reduces the number of
   476  	// connections required and increases the number of "gets" per connection.
   477  	sortedKeys := c.sortKeysByServer(keys)
   478  
   479  	// Allocate a channel to store results for each batch request. The max concurrency will be
   480  	// enforced by doWithBatch.
   481  	results := make(chan *memcachedGetMultiResult, numResults)
   482  	defer close(results)
   483  
   484  	// Ignore the error here since it can only be returned by our provided function which
   485  	// always returns nil. NOTE also we are using a background context here for the doWithBatch
   486  	// method. This is to ensure that it runs the expected number of batches _even if_ our
   487  	// context (`ctx`) is canceled since we expect a certain number of batches to be read
   488  	// from `results` below. The wrapped `getMultiSingle` method will still check our context
   489  	// and short-circuit if it has been canceled.
   490  	_ = doWithBatch(context.Background(), len(keys), c.config.MaxGetMultiBatchSize, getMultiGate, func(startIndex, endIndex int) error {
   491  		batchKeys := sortedKeys[startIndex:endIndex]
   492  
   493  		res := &memcachedGetMultiResult{}
   494  		res.items, res.err = c.getMultiSingle(ctx, batchKeys)
   495  
   496  		results <- res
   497  		return nil
   498  	})
   499  
   500  	// Wait for all batch results. In case of error, we keep
   501  	// track of the last error occurred.
   502  	items := make([]map[string]*memcache.Item, 0, numResults)
   503  	var lastErr error
   504  
   505  	for i := 0; i < numResults; i++ {
   506  		result := <-results
   507  		if result.err != nil {
   508  			lastErr = result.err
   509  			continue
   510  		}
   511  
   512  		items = append(items, result.items)
   513  	}
   514  
   515  	return items, lastErr
   516  }
   517  
   518  func (c *memcachedClient) getMultiSingle(ctx context.Context, keys []string) (items map[string]*memcache.Item, err error) {
   519  	start := time.Now()
   520  	c.operations.WithLabelValues(opGetMulti).Inc()
   521  
   522  	select {
   523  	case <-ctx.Done():
   524  		// Make sure our context hasn't been canceled before fetching cache items using
   525  		// cache client backend.
   526  		return nil, ctx.Err()
   527  	default:
   528  		items, err = c.client.GetMulti(keys)
   529  	}
   530  
   531  	if err != nil {
   532  		level.Debug(c.logger).Log("msg", "failed to get multiple items from memcached", "err", err)
   533  		c.trackError(opGetMulti, err)
   534  	} else {
   535  		var total int
   536  		for _, it := range items {
   537  			total += len(it.Value)
   538  		}
   539  		c.dataSize.WithLabelValues(opGetMulti).Observe(float64(total))
   540  		c.duration.WithLabelValues(opGetMulti).Observe(time.Since(start).Seconds())
   541  	}
   542  
   543  	return items, err
   544  }
   545  
   546  // sortKeysByServer sorts cache keys within a slice based on which server they are
   547  // sharded to using a memcache.ServerSelector instance. The keys are ordered so keys
   548  // on the same server are next to each other. Any errors encountered determining which
   549  // server a key should be on will result in returning keys unsorted (in the same order
   550  // they were supplied in). Note that output is not guaranteed to be any particular order
   551  // *except* that keys sharded to the same server will be together. The order of keys
   552  // returned may change from call to call.
   553  func (c *memcachedClient) sortKeysByServer(keys []string) []string {
   554  	bucketed := make(map[string][]string)
   555  
   556  	for _, key := range keys {
   557  		addr, err := c.selector.PickServer(key)
   558  		// If we couldn't determine the correct server, return keys in existing order
   559  		if err != nil {
   560  			return keys
   561  		}
   562  
   563  		addrString := addr.String()
   564  		bucketed[addrString] = append(bucketed[addrString], key)
   565  	}
   566  
   567  	var out []string
   568  	for srv := range bucketed {
   569  		out = append(out, bucketed[srv]...)
   570  	}
   571  
   572  	return out
   573  }
   574  
   575  func (c *memcachedClient) trackError(op string, err error) {
   576  	var connErr *memcache.ConnectTimeoutError
   577  	var netErr net.Error
   578  	switch {
   579  	case errors.As(err, &connErr):
   580  		c.failures.WithLabelValues(op, reasonTimeout).Inc()
   581  	case errors.As(err, &netErr):
   582  		if netErr.Timeout() {
   583  			c.failures.WithLabelValues(op, reasonTimeout).Inc()
   584  		} else {
   585  			c.failures.WithLabelValues(op, reasonNetworkError).Inc()
   586  		}
   587  	case errors.Is(err, memcache.ErrMalformedKey):
   588  		c.failures.WithLabelValues(op, reasonMalformedKey).Inc()
   589  	case errors.Is(err, memcache.ErrServerError):
   590  		c.failures.WithLabelValues(op, reasonServerError).Inc()
   591  	default:
   592  		c.failures.WithLabelValues(op, reasonOther).Inc()
   593  	}
   594  }
   595  
   596  func (c *memcachedClient) resolveAddrsLoop() {
   597  	defer c.p.workers.Done()
   598  
   599  	ticker := time.NewTicker(c.config.DNSProviderUpdateInterval)
   600  	defer ticker.Stop()
   601  
   602  	for {
   603  		select {
   604  		case <-ticker.C:
   605  			err := c.resolveAddrs()
   606  			if err != nil {
   607  				level.Warn(c.logger).Log("msg", "failed update memcached servers list", "err", err)
   608  			}
   609  		case <-c.p.stop:
   610  			return
   611  		}
   612  	}
   613  }
   614  
   615  func (c *memcachedClient) resolveAddrs() error {
   616  	// Resolve configured addresses with a reasonable timeout.
   617  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   618  	defer cancel()
   619  
   620  	// If some of the dns resolution fails, log the error.
   621  	if err := c.addressProvider.Resolve(ctx, c.config.Addresses); err != nil {
   622  		level.Error(c.logger).Log("msg", "failed to resolve addresses for memcached", "addresses", strings.Join(c.config.Addresses, ","), "err", err)
   623  	}
   624  	// Fail in case no server address is resolved.
   625  	servers := c.addressProvider.Addresses()
   626  	if len(servers) == 0 {
   627  		return fmt.Errorf("no server address resolved for %s", c.name)
   628  	}
   629  
   630  	return c.selector.SetServers(servers...)
   631  }