github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cluster/client/etcd/client.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package etcd 22 23 import ( 24 "crypto/rand" 25 "errors" 26 "fmt" 27 "math/big" 28 "os" 29 "path/filepath" 30 "sort" 31 "strings" 32 "sync" 33 "time" 34 35 "github.com/m3db/m3/src/cluster/client" 36 "github.com/m3db/m3/src/cluster/kv" 37 etcdkv "github.com/m3db/m3/src/cluster/kv/etcd" 38 "github.com/m3db/m3/src/cluster/services" 39 etcdheartbeat "github.com/m3db/m3/src/cluster/services/heartbeat/etcd" 40 "github.com/m3db/m3/src/cluster/services/leader" 41 "github.com/m3db/m3/src/x/instrument" 42 "github.com/m3db/m3/src/x/retry" 43 44 "github.com/uber-go/tally" 45 clientv3 "go.etcd.io/etcd/client/v3" 46 "go.uber.org/zap" 47 ) 48 49 const ( 50 hierarchySeparator = "/" 51 internalPrefix = "_" 52 cacheFileSeparator = "_" 53 cacheFileSuffix = ".json" 54 // TODO deprecate this once all keys are migrated to per service namespace 55 kvPrefix = "_kv" 56 57 // Set GRPC response limits to 32 MiB, should be sufficient for most use cases. 58 // The default 2 MiB limit usually comes as an unpleasant surprise - etcd itself will reject 59 // requests that are too large anyway, and there are many other ways to tank etcd, 60 // like creating too many watchers. 61 _grpcMaxSendRecvBufferSize = 32 * 1024 * 1024 62 ) 63 64 var errInvalidNamespace = errors.New("invalid namespace") 65 66 // make sure m3cluster and etcd client interfaces are implemented, and that 67 // Client is a superset of cluster.Client. 68 var _ client.Client = Client((*csclient)(nil)) 69 70 type newClientFn func(cluster Cluster) (*clientv3.Client, error) 71 72 type cacheFileForZoneFn func(zone string) etcdkv.CacheFileFn 73 74 // ZoneClient is a cached etcd client for a zone. 75 type ZoneClient struct { 76 Client *clientv3.Client 77 Zone string 78 } 79 80 // NewEtcdConfigServiceClient returns a new etcd-backed cluster client. 81 //nolint:golint 82 func NewEtcdConfigServiceClient(opts Options) (*csclient, error) { 83 if err := opts.Validate(); err != nil { 84 return nil, err 85 } 86 87 scope := opts.InstrumentOptions(). 88 MetricsScope(). 89 Tagged(map[string]string{"service": opts.Service()}) 90 91 return &csclient{ 92 opts: opts, 93 sdOpts: opts.ServicesOptions(), 94 kvScope: scope.Tagged(map[string]string{"config_service": "kv"}), 95 sdScope: scope.Tagged(map[string]string{"config_service": "sd"}), 96 hbScope: scope.Tagged(map[string]string{"config_service": "hb"}), 97 clis: make(map[string]*clientv3.Client), 98 logger: opts.InstrumentOptions().Logger(), 99 newFn: newClient, 100 retrier: retry.NewRetrier(opts.RetryOptions()), 101 stores: make(map[string]kv.TxnStore), 102 }, nil 103 } 104 105 // NewConfigServiceClient returns a ConfigServiceClient. 106 func NewConfigServiceClient(opts Options) (client.Client, error) { 107 return NewEtcdConfigServiceClient(opts) 108 } 109 110 type csclient struct { 111 sync.RWMutex 112 clis map[string]*clientv3.Client 113 114 opts Options 115 sdOpts services.Options 116 kvScope tally.Scope 117 sdScope tally.Scope 118 hbScope tally.Scope 119 logger *zap.Logger 120 newFn newClientFn 121 retrier retry.Retrier 122 123 storeLock sync.Mutex 124 stores map[string]kv.TxnStore 125 } 126 127 func (c *csclient) Services(opts services.OverrideOptions) (services.Services, error) { 128 if opts == nil { 129 opts = services.NewOverrideOptions() 130 } 131 return c.createServices(opts) 132 } 133 134 func (c *csclient) KV() (kv.Store, error) { 135 return c.Txn() 136 } 137 138 func (c *csclient) Txn() (kv.TxnStore, error) { 139 return c.TxnStore(kv.NewOverrideOptions()) 140 } 141 142 func (c *csclient) Store(opts kv.OverrideOptions) (kv.Store, error) { 143 return c.TxnStore(opts) 144 } 145 146 func (c *csclient) TxnStore(opts kv.OverrideOptions) (kv.TxnStore, error) { 147 opts, err := c.sanitizeOptions(opts) 148 if err != nil { 149 return nil, err 150 } 151 152 return c.createTxnStore(opts) 153 } 154 155 func (c *csclient) createServices(opts services.OverrideOptions) (services.Services, error) { 156 nOpts := opts.NamespaceOptions() 157 cacheFileExtraFields := []string{nOpts.PlacementNamespace(), nOpts.MetadataNamespace()} 158 return services.NewServices(c.sdOpts. 159 SetHeartbeatGen(c.heartbeatGen()). 160 SetKVGen(c.kvGen(c.cacheFileFn(cacheFileExtraFields...))). 161 SetLeaderGen(c.leaderGen()). 162 SetNamespaceOptions(nOpts). 163 SetInstrumentsOptions(instrument.NewOptions(). 164 SetLogger(c.logger). 165 SetMetricsScope(c.sdScope), 166 ), 167 ) 168 } 169 170 func (c *csclient) createTxnStore(opts kv.OverrideOptions) (kv.TxnStore, error) { 171 // validate the override options because they are user supplied. 172 if err := opts.Validate(); err != nil { 173 return nil, err 174 } 175 return c.txnGen(opts, c.cacheFileFn()) 176 } 177 178 func (c *csclient) kvGen(fn cacheFileForZoneFn) services.KVGen { 179 return services.KVGen(func(zone string) (kv.Store, error) { 180 // we don't validate or sanitize the options here because we're using 181 // them as a container for zone. 182 opts := kv.NewOverrideOptions().SetZone(zone) 183 return c.txnGen(opts, fn) 184 }) 185 } 186 187 func (c *csclient) newkvOptions( 188 opts kv.OverrideOptions, 189 cacheFileFn cacheFileForZoneFn, 190 ) etcdkv.Options { 191 kvOpts := etcdkv.NewOptions(). 192 SetInstrumentsOptions(c.opts.InstrumentOptions(). 193 SetLogger(c.logger). 194 SetMetricsScope(c.kvScope)). 195 SetCacheFileFn(cacheFileFn(opts.Zone())). 196 SetWatchWithRevision(c.opts.WatchWithRevision()). 197 SetNewDirectoryMode(c.opts.NewDirectoryMode()). 198 SetEnableFastGets(c.opts.EnableFastGets()). 199 SetRetryOptions(c.opts.RetryOptions()). 200 SetRequestTimeout(c.opts.RequestTimeout()). 201 SetWatchChanInitTimeout(c.opts.WatchChanInitTimeout()). 202 SetWatchChanCheckInterval(c.opts.WatchChanCheckInterval()). 203 SetWatchChanResetInterval(c.opts.WatchChanResetInterval()) 204 205 if ns := opts.Namespace(); ns != "" { 206 kvOpts = kvOpts.SetPrefix(kvOpts.ApplyPrefix(ns)) 207 } 208 209 if env := opts.Environment(); env != "" { 210 kvOpts = kvOpts.SetPrefix(kvOpts.ApplyPrefix(env)) 211 } 212 213 return kvOpts 214 } 215 216 // txnGen assumes the caller has validated the options passed if they are 217 // user-supplied (as opposed to constructed ourselves). 218 func (c *csclient) txnGen( 219 opts kv.OverrideOptions, 220 cacheFileFn cacheFileForZoneFn, 221 ) (kv.TxnStore, error) { 222 cli, err := c.etcdClientGen(opts.Zone()) 223 if err != nil { 224 return nil, err 225 } 226 227 c.storeLock.Lock() 228 defer c.storeLock.Unlock() 229 230 key := kvStoreCacheKey(opts.Zone(), opts.Namespace(), opts.Environment()) 231 store, ok := c.stores[key] 232 if ok { 233 return store, nil 234 } 235 if store, err = etcdkv.NewStore(cli, c.newkvOptions(opts, cacheFileFn)); err != nil { 236 return nil, err 237 } 238 239 c.stores[key] = store 240 return store, nil 241 } 242 243 func (c *csclient) heartbeatGen() services.HeartbeatGen { 244 return services.HeartbeatGen( 245 func(sid services.ServiceID) (services.HeartbeatService, error) { 246 cli, err := c.etcdClientGen(sid.Zone()) 247 if err != nil { 248 return nil, err 249 } 250 251 opts := etcdheartbeat.NewOptions(). 252 SetInstrumentsOptions(instrument.NewOptions(). 253 SetLogger(c.logger). 254 SetMetricsScope(c.hbScope)). 255 SetServiceID(sid) 256 return etcdheartbeat.NewStore(cli, opts) 257 }, 258 ) 259 } 260 261 func (c *csclient) leaderGen() services.LeaderGen { 262 return services.LeaderGen( 263 func(sid services.ServiceID, eo services.ElectionOptions) (services.LeaderService, error) { 264 cli, err := c.etcdClientGen(sid.Zone()) 265 if err != nil { 266 return nil, err 267 } 268 269 opts := leader.NewOptions(). 270 SetServiceID(sid). 271 SetElectionOpts(eo) 272 273 return leader.NewService(cli, opts) 274 }, 275 ) 276 } 277 278 func (c *csclient) etcdClientGen(zone string) (*clientv3.Client, error) { 279 c.Lock() 280 defer c.Unlock() 281 282 cli, ok := c.clis[zone] 283 if ok { 284 return cli, nil 285 } 286 287 cluster, ok := c.opts.ClusterForZone(zone) 288 if !ok { 289 return nil, fmt.Errorf("no etcd cluster found for zone: %s", zone) 290 } 291 292 err := c.retrier.Attempt(func() error { 293 var tryErr error 294 cli, tryErr = c.newFn(cluster) 295 return tryErr 296 }) 297 if err != nil { 298 return nil, err 299 } 300 301 c.clis[zone] = cli 302 return cli, nil 303 } 304 305 // Clients returns all currently cached etcd clients. 306 func (c *csclient) Clients() []ZoneClient { 307 c.Lock() 308 defer c.Unlock() 309 310 var ( 311 zones = make([]string, 0, len(c.clis)) 312 clients = make([]ZoneClient, 0, len(c.clis)) 313 ) 314 315 for k := range c.clis { 316 zones = append(zones, k) 317 } 318 319 sort.Strings(zones) 320 321 for _, zone := range zones { 322 clients = append(clients, ZoneClient{Zone: zone, Client: c.clis[zone]}) 323 } 324 325 return clients 326 } 327 328 func newClient(cluster Cluster) (*clientv3.Client, error) { 329 cfg, err := newConfigFromCluster(cryptoRandInt63n, cluster) 330 if err != nil { 331 return nil, err 332 } 333 return clientv3.New(cfg) 334 } 335 336 // rnd is used to set a jitter on the keep alive. 337 func newConfigFromCluster(rnd randInt63N, cluster Cluster) (clientv3.Config, error) { 338 tls, err := cluster.TLSOptions().Config() 339 if err != nil { 340 return clientv3.Config{}, err 341 } 342 343 // Support disabling autosync if a user very explicitly requests it (via negative duration). 344 autoSyncInterval := cluster.AutoSyncInterval() 345 if autoSyncInterval < 0 { 346 autoSyncInterval = 0 347 } 348 cfg := clientv3.Config{ 349 AutoSyncInterval: autoSyncInterval, 350 DialTimeout: cluster.DialTimeout(), 351 DialOptions: cluster.DialOptions(), 352 Endpoints: cluster.Endpoints(), 353 TLS: tls, 354 MaxCallSendMsgSize: _grpcMaxSendRecvBufferSize, 355 MaxCallRecvMsgSize: _grpcMaxSendRecvBufferSize, 356 } 357 358 if opts := cluster.KeepAliveOptions(); opts.KeepAliveEnabled() { 359 keepAlivePeriod := opts.KeepAlivePeriod() 360 if maxJitter := opts.KeepAlivePeriodMaxJitter(); maxJitter > 0 { 361 jitter, err := rnd(int64(maxJitter)) 362 if err != nil { 363 return clientv3.Config{}, err 364 } 365 keepAlivePeriod += time.Duration(jitter) 366 } 367 cfg.DialKeepAliveTime = keepAlivePeriod 368 cfg.DialKeepAliveTimeout = opts.KeepAliveTimeout() 369 cfg.PermitWithoutStream = true 370 } 371 372 return cfg, nil 373 } 374 375 func (c *csclient) cacheFileFn(extraFields ...string) cacheFileForZoneFn { 376 return func(zone string) etcdkv.CacheFileFn { 377 return func(namespace string) string { 378 if c.opts.CacheDir() == "" { 379 return "" 380 } 381 382 cacheFileFields := make([]string, 0, len(extraFields)+3) 383 cacheFileFields = append(cacheFileFields, namespace, c.opts.Service(), zone) 384 cacheFileFields = append(cacheFileFields, extraFields...) 385 return filepath.Join(c.opts.CacheDir(), fileName(cacheFileFields...)) 386 } 387 } 388 } 389 390 func fileName(parts ...string) string { 391 // get non-empty parts 392 idx := 0 393 for i, part := range parts { 394 if part == "" { 395 continue 396 } 397 if i != idx { 398 parts[idx] = part 399 } 400 idx++ 401 } 402 parts = parts[:idx] 403 s := strings.Join(parts, cacheFileSeparator) 404 return strings.Replace(s, string(os.PathSeparator), cacheFileSeparator, -1) + cacheFileSuffix 405 } 406 407 func validateTopLevelNamespace(namespace string) error { 408 if namespace == "" || namespace == hierarchySeparator { 409 return errInvalidNamespace 410 } 411 if strings.HasPrefix(namespace, internalPrefix) { 412 // start with _ 413 return errInvalidNamespace 414 } 415 if strings.HasPrefix(namespace, hierarchySeparator+internalPrefix) { 416 return errInvalidNamespace 417 } 418 return nil 419 } 420 421 func (c *csclient) sanitizeOptions(opts kv.OverrideOptions) (kv.OverrideOptions, error) { 422 if opts.Zone() == "" { 423 opts = opts.SetZone(c.opts.Zone()) 424 } 425 426 if opts.Environment() == "" { 427 opts = opts.SetEnvironment(c.opts.Env()) 428 } 429 430 namespace := opts.Namespace() 431 if namespace == "" { 432 return opts.SetNamespace(kvPrefix), nil 433 } 434 435 if err := validateTopLevelNamespace(namespace); err != nil { 436 return nil, err 437 } 438 439 return opts, nil 440 } 441 442 func kvStoreCacheKey(zone string, namespaces ...string) string { 443 parts := make([]string, 0, 1+len(namespaces)) 444 parts = append(parts, zone) 445 for _, ns := range namespaces { 446 if ns != "" { 447 parts = append(parts, ns) 448 } 449 } 450 return strings.Join(parts, hierarchySeparator) 451 } 452 453 // We have a linter which dislikes math.Rand, as it's insecure in the general case. Our usage here is very unlikely 454 // to have security implications, but it won't hurt to make the linter happy. 455 type randInt63N func(n int64) (int64, error) 456 457 func cryptoRandInt63n(n int64) (int64, error) { 458 r, err := rand.Int(rand.Reader, big.NewInt(n)) 459 if err != nil { 460 return 0, err 461 } 462 return r.Int64(), nil 463 }