github.com/thanos-io/thanos@v0.32.5/internal/cortex/chunk/cache/memcached_client.go (about) 1 // Copyright (c) The Cortex Authors. 2 // Licensed under the Apache License 2.0. 3 4 package cache 5 6 import ( 7 "context" 8 "flag" 9 "fmt" 10 "net" 11 "sort" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/bradfitz/gomemcache/memcache" 17 "github.com/go-kit/log" 18 "github.com/go-kit/log/level" 19 "github.com/prometheus/client_golang/prometheus" 20 "github.com/prometheus/client_golang/prometheus/promauto" 21 "github.com/sony/gobreaker" 22 "github.com/thanos-io/thanos/pkg/discovery/dns" 23 ) 24 25 // MemcachedClient interface exists for mocking memcacheClient. 26 type MemcachedClient interface { 27 GetMulti(keys []string) (map[string]*memcache.Item, error) 28 Set(item *memcache.Item) error 29 } 30 31 type serverSelector interface { 32 memcache.ServerSelector 33 SetServers(servers ...string) error 34 } 35 36 // memcachedClient is a memcache client that gets its server list from SRV 37 // records, and periodically updates that ServerList. 38 type memcachedClient struct { 39 sync.Mutex 40 name string 41 *memcache.Client 42 serverList serverSelector 43 44 hostname string 45 service string 46 47 addresses []string 48 provider *dns.Provider 49 50 cbs map[ /*address*/ string]*gobreaker.CircuitBreaker 51 cbFailures uint 52 cbTimeout time.Duration 53 cbInterval time.Duration 54 55 maxItemSize int 56 57 quit chan struct{} 58 wait sync.WaitGroup 59 60 numServers prometheus.Gauge 61 skipped prometheus.Counter 62 63 logger log.Logger 64 } 65 66 // MemcachedClientConfig defines how a MemcachedClient should be constructed. 67 type MemcachedClientConfig struct { 68 Host string `yaml:"host"` 69 Service string `yaml:"service"` 70 Addresses string `yaml:"addresses"` // EXPERIMENTAL. 71 Timeout time.Duration `yaml:"timeout"` 72 MaxIdleConns int `yaml:"max_idle_conns"` 73 MaxItemSize int `yaml:"max_item_size"` 74 UpdateInterval time.Duration `yaml:"update_interval"` 75 ConsistentHash bool `yaml:"consistent_hash"` 76 CBFailures uint `yaml:"circuit_breaker_consecutive_failures"` 77 CBTimeout time.Duration `yaml:"circuit_breaker_timeout"` // reset error count after this long 78 CBInterval time.Duration `yaml:"circuit_breaker_interval"` // remain closed for this long after CBFailures errors 79 } 80 81 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet 82 func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) { 83 f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use. If empty and if addresses is unset, no memcached will be used.") 84 f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.") 85 f.StringVar(&cfg.Addresses, prefix+"memcached.addresses", "", description+"EXPERIMENTAL: Comma separated addresses list in DNS Service Discovery format: https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery") 86 f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.") 87 f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.") 88 f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.") 89 f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.") 90 f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 10, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).") 91 f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).") 92 f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).") 93 f.IntVar(&cfg.MaxItemSize, prefix+"memcached.max-item-size", 0, description+"The maximum size of an item stored in memcached. Bigger items are not stored. If set to 0, no maximum size is enforced.") 94 } 95 96 // NewMemcachedClient creates a new MemcacheClient that gets its server list 97 // from SRV and updates the server list on a regular basis. 98 func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer, logger log.Logger) MemcachedClient { 99 var selector serverSelector 100 if cfg.ConsistentHash { 101 selector = &MemcachedJumpHashSelector{} 102 } else { 103 selector = &memcache.ServerList{} 104 } 105 106 client := memcache.NewFromSelector(selector) 107 client.Timeout = cfg.Timeout 108 client.MaxIdleConns = cfg.MaxIdleConns 109 110 dnsProviderRegisterer := prometheus.WrapRegistererWithPrefix("cortex_", prometheus.WrapRegistererWith(prometheus.Labels{ 111 "name": name, 112 }, r)) 113 114 newClient := &memcachedClient{ 115 name: name, 116 Client: client, 117 serverList: selector, 118 hostname: cfg.Host, 119 service: cfg.Service, 120 logger: logger, 121 provider: dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType), 122 cbs: make(map[string]*gobreaker.CircuitBreaker), 123 cbFailures: cfg.CBFailures, 124 cbInterval: cfg.CBInterval, 125 cbTimeout: cfg.CBTimeout, 126 maxItemSize: cfg.MaxItemSize, 127 quit: make(chan struct{}), 128 129 numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{ 130 Namespace: "cortex", 131 Name: "memcache_client_servers", 132 Help: "The number of memcache servers discovered.", 133 ConstLabels: prometheus.Labels{"name": name}, 134 }), 135 136 skipped: promauto.With(r).NewCounter(prometheus.CounterOpts{ 137 Namespace: "cortex", 138 Name: "memcache_client_set_skip_total", 139 Help: "Total number of skipped set operations because of the value is larger than the max-item-size.", 140 ConstLabels: prometheus.Labels{"name": name}, 141 }), 142 } 143 if cfg.CBFailures > 0 { 144 newClient.Client.DialTimeout = newClient.dialViaCircuitBreaker 145 } 146 147 if len(cfg.Addresses) > 0 { 148 newClient.addresses = strings.Split(cfg.Addresses, ",") 149 } 150 151 err := newClient.updateMemcacheServers() 152 if err != nil { 153 level.Error(logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err) 154 } 155 156 newClient.wait.Add(1) 157 go newClient.updateLoop(cfg.UpdateInterval) 158 return newClient 159 } 160 161 func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) { 162 level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from-state", from, "to-state", to) 163 } 164 165 func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) { 166 c.Lock() 167 cb := c.cbs[address] 168 if cb == nil { 169 cb = gobreaker.NewCircuitBreaker(gobreaker.Settings{ 170 Name: c.name + ":" + address, 171 Interval: c.cbInterval, 172 Timeout: c.cbTimeout, 173 OnStateChange: c.circuitBreakerStateChange, 174 ReadyToTrip: func(counts gobreaker.Counts) bool { 175 return uint(counts.ConsecutiveFailures) > c.cbFailures 176 }, 177 }) 178 c.cbs[address] = cb 179 } 180 c.Unlock() 181 182 conn, err := cb.Execute(func() (interface{}, error) { 183 return net.DialTimeout(network, address, timeout) 184 }) 185 if err != nil { 186 return nil, err 187 } 188 return conn.(net.Conn), nil 189 } 190 191 func (c *memcachedClient) updateLoop(updateInterval time.Duration) { 192 defer c.wait.Done() 193 ticker := time.NewTicker(updateInterval) 194 for { 195 select { 196 case <-ticker.C: 197 err := c.updateMemcacheServers() 198 if err != nil { 199 level.Warn(c.logger).Log("msg", "error updating memcache servers", "err", err) 200 } 201 case <-c.quit: 202 ticker.Stop() 203 return 204 } 205 } 206 } 207 208 // updateMemcacheServers sets a memcache server list from SRV records. SRV 209 // priority & weight are ignored. 210 func (c *memcachedClient) updateMemcacheServers() error { 211 var servers []string 212 213 if len(c.addresses) > 0 { 214 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 215 defer cancel() 216 217 if err := c.provider.Resolve(ctx, c.addresses); err != nil { 218 return err 219 } 220 servers = c.provider.Addresses() 221 } else { 222 _, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname) 223 if err != nil { 224 return err 225 } 226 for _, srv := range addrs { 227 servers = append(servers, fmt.Sprintf("%s:%d", srv.Target, srv.Port)) 228 } 229 } 230 231 if len(servers) > 0 { 232 // Copy across circuit-breakers for current set of addresses, thus 233 // leaving behind any for servers we won't talk to again 234 c.Lock() 235 newCBs := make(map[string]*gobreaker.CircuitBreaker, len(servers)) 236 for _, address := range servers { 237 if cb, exists := c.cbs[address]; exists { 238 newCBs[address] = cb 239 } 240 } 241 c.cbs = newCBs 242 c.Unlock() 243 } 244 245 // ServerList deterministically maps keys to _index_ of the server list. 246 // Since DNS returns records in different order each time, we sort to 247 // guarantee best possible match between nodes. 248 sort.Strings(servers) 249 c.numServers.Set(float64(len(servers))) 250 return c.serverList.SetServers(servers...) 251 }