github.com/m3db/m3@v1.5.0/src/cluster/kv/etcd/store.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package etcd 22 23 import ( 24 "encoding/json" 25 "errors" 26 "fmt" 27 "os" 28 "path" 29 "sync" 30 31 "github.com/m3db/m3/src/cluster/etcd/watchmanager" 32 "github.com/m3db/m3/src/cluster/kv" 33 xerrors "github.com/m3db/m3/src/x/errors" 34 "github.com/m3db/m3/src/x/retry" 35 36 "github.com/golang/protobuf/proto" 37 "github.com/uber-go/tally" 38 clientv3 "go.etcd.io/etcd/client/v3" 39 "go.uber.org/zap" 40 "golang.org/x/net/context" 41 ) 42 43 const etcdVersionZero = 0 44 45 var ( 46 noopCancel func() 47 emptyCmp clientv3.Cmp 48 emptyOp clientv3.Op 49 errInvalidHistoryVersion = errors.New("invalid version range") 50 errNilPutResponse = errors.New("nil put response from etcd") 51 ) 52 53 // NewStore creates a kv store based on etcd 54 func NewStore(etcdKV *clientv3.Client, opts Options) (kv.TxnStore, error) { 55 scope := opts.InstrumentsOptions().MetricsScope() 56 57 store := &client{ 58 opts: opts, 59 kv: etcdKV, 60 watchables: map[string]kv.ValueWatchable{}, 61 retrier: retry.NewRetrier(opts.RetryOptions()), 62 logger: opts.InstrumentsOptions().Logger(), 63 cacheFile: opts.CacheFileFn()(opts.Prefix()), 64 cache: newCache(), 65 cacheUpdatedCh: make(chan struct{}, 1), 66 m: clientMetrics{ 67 etcdGetError: scope.Counter("etcd-get-error"), 68 etcdPutError: scope.Counter("etcd-put-error"), 69 etcdTnxError: scope.Counter("etcd-tnx-error"), 70 diskWriteError: scope.Counter("disk-write-error"), 71 diskReadError: scope.Counter("disk-read-error"), 72 }, 73 } 74 75 clientWatchOpts := []clientv3.OpOption{ 76 // periodically (appx every 10 mins) checks for the latest data 77 // with or without any update notification 78 clientv3.WithProgressNotify(), 79 // receive initial notification once the watch channel is created 80 clientv3.WithCreatedNotify(), 81 } 82 83 if rev := opts.WatchWithRevision(); rev > 0 { 84 clientWatchOpts = append(clientWatchOpts, clientv3.WithRev(rev)) 85 } 86 87 wOpts := watchmanager.NewOptions(). 88 SetClient(etcdKV). 89 SetUpdateFn(store.update). 90 SetTickAndStopFn(store.tickAndStop). 91 SetWatchOptions(clientWatchOpts). 92 SetWatchChanCheckInterval(opts.WatchChanCheckInterval()). 93 SetWatchChanInitTimeout(opts.WatchChanInitTimeout()). 94 SetWatchChanResetInterval(opts.WatchChanResetInterval()). 95 SetInstrumentsOptions(opts.InstrumentsOptions()) 96 97 wm, err := watchmanager.NewWatchManager(wOpts) 98 if err != nil { 99 return nil, err 100 } 101 102 store.wm = wm 103 104 if store.cacheFile != "" { 105 if err := store.initCache(opts.NewDirectoryMode()); err != nil { 106 store.logger.Warn("could not load cache from file", zap.String("file", store.cacheFile), zap.Error(err)) 107 } else { 108 store.logger.Info("successfully loaded cache from file", zap.String("file", store.cacheFile)) 109 } 110 111 go func() { 112 for range store.cacheUpdatedCh { 113 if err := store.writeCacheToFile(); err != nil { 114 store.logger.Warn("failed to write cache to file", zap.Error(err)) 115 } 116 } 117 }() 118 } 119 return store, nil 120 } 121 122 type client struct { 123 sync.RWMutex 124 125 opts Options 126 kv *clientv3.Client 127 watchables map[string]kv.ValueWatchable 128 retrier retry.Retrier 129 logger *zap.Logger 130 m clientMetrics 131 cache *valueCache 132 cacheFile string 133 cacheUpdatedCh chan struct{} 134 135 wm watchmanager.WatchManager 136 } 137 138 type clientMetrics struct { 139 etcdGetError tally.Counter 140 etcdPutError tally.Counter 141 etcdTnxError tally.Counter 142 diskWriteError tally.Counter 143 diskReadError tally.Counter 144 } 145 146 // Get returns the latest value from etcd store and only fall back to 147 // in-memory cache if the remote store is unavailable 148 func (c *client) Get(key string) (kv.Value, error) { 149 return c.get(c.opts.ApplyPrefix(key)) 150 } 151 152 func (c *client) get(key string) (kv.Value, error) { 153 ctx, cancel := c.context() 154 defer cancel() 155 156 var opts []clientv3.OpOption 157 if c.opts.EnableFastGets() { 158 opts = append(opts, clientv3.WithSerializable()) 159 } 160 r, err := c.kv.Get(ctx, key, opts...) 161 if err != nil { 162 c.m.etcdGetError.Inc(1) 163 cachedV, ok := c.getCache(key) 164 if ok { 165 return cachedV, nil 166 } 167 return nil, err 168 } 169 170 if r.Count == 0 { 171 c.deleteCache(key) // delete cache entry if it exists 172 return nil, kv.ErrNotFound 173 } 174 175 v := newValue(r.Kvs[0].Value, r.Kvs[0].Version, r.Kvs[0].ModRevision) 176 177 c.mergeCache(key, v) 178 179 return v, nil 180 } 181 182 func (c *client) History(key string, from, to int) ([]kv.Value, error) { 183 if from > to || from < 0 || to < 0 { 184 return nil, errInvalidHistoryVersion 185 } 186 187 if from == to { 188 return nil, nil 189 } 190 191 newKey := c.opts.ApplyPrefix(key) 192 193 ctx, cancel := c.context() 194 defer cancel() 195 196 r, err := c.kv.Get(ctx, newKey) 197 if err != nil { 198 return nil, err 199 } 200 201 if r.Count == 0 { 202 return nil, kv.ErrNotFound 203 } 204 205 numValue := to - from 206 207 latestKV := r.Kvs[0] 208 version := int(latestKV.Version) 209 modRev := latestKV.ModRevision 210 211 if version < from { 212 // no value available in the requested version range 213 return nil, nil 214 } 215 216 if version-from+1 < numValue { 217 // get the correct size of the result slice 218 numValue = version - from + 1 219 } 220 221 res := make([]kv.Value, numValue) 222 223 if version < to { 224 // put it in the last element of the result 225 res[version-from] = newValue(latestKV.Value, latestKV.Version, modRev) 226 } 227 228 for version > from { 229 ctx, cancel := c.context() 230 defer cancel() 231 232 r, err = c.kv.Get(ctx, newKey, clientv3.WithRev(modRev-1)) 233 if err != nil { 234 return nil, err 235 } 236 237 if r.Count == 0 { 238 // unexpected 239 return nil, fmt.Errorf("could not find version %d for key %s", version-1, key) 240 } 241 242 v := r.Kvs[0] 243 modRev = v.ModRevision 244 version = int(v.Version) 245 if version < to { 246 res[version-from] = newValue(v.Value, v.Version, v.ModRevision) 247 } 248 } 249 250 return res, nil 251 } 252 253 func (c *client) processCondition(condition kv.Condition) (clientv3.Cmp, error) { 254 var cmp clientv3.Cmp 255 switch condition.TargetType() { 256 case kv.TargetVersion: 257 cmp = clientv3.Version(c.opts.ApplyPrefix(condition.Key())) 258 default: 259 return emptyCmp, kv.ErrUnknownTargetType 260 } 261 262 var compareStr string 263 switch condition.CompareType() { 264 case kv.CompareEqual: 265 compareStr = condition.CompareType().String() 266 default: 267 return emptyCmp, kv.ErrUnknownCompareType 268 } 269 270 return clientv3.Compare(cmp, compareStr, condition.Value()), nil 271 } 272 273 func (c *client) processOp(op kv.Op) (clientv3.Op, error) { 274 switch op.Type() { 275 case kv.OpSet: 276 opSet := op.(kv.SetOp) 277 278 value, err := proto.Marshal(opSet.Value) 279 if err != nil { 280 return emptyOp, err 281 } 282 283 return clientv3.OpPut( 284 c.opts.ApplyPrefix(opSet.Key()), 285 string(value), 286 clientv3.WithPrevKV(), 287 ), nil 288 default: 289 return emptyOp, kv.ErrUnknownOpType 290 } 291 } 292 293 func (c *client) Commit(conditions []kv.Condition, ops []kv.Op) (kv.Response, error) { 294 ctx, cancel := c.context() 295 defer cancel() 296 297 txn := c.kv.Txn(ctx) 298 299 cmps := make([]clientv3.Cmp, len(conditions)) 300 for i, condition := range conditions { 301 cmp, err := c.processCondition(condition) 302 if err != nil { 303 return nil, err 304 } 305 306 cmps[i] = cmp 307 } 308 309 txn = txn.If(cmps...) 310 311 etcdOps := make([]clientv3.Op, len(ops)) 312 opResponses := make([]kv.OpResponse, len(ops)) 313 for i, op := range ops { 314 etcdOp, err := c.processOp(op) 315 if err != nil { 316 return nil, err 317 } 318 319 etcdOps[i] = etcdOp 320 opResponses[i] = kv.NewOpResponse(op) 321 } 322 323 txn = txn.Then(etcdOps...) 324 325 r, err := txn.Commit() 326 if err != nil { 327 c.m.etcdTnxError.Inc(1) 328 return nil, err 329 } 330 if !r.Succeeded { 331 return nil, kv.ErrConditionCheckFailed 332 } 333 334 for i := range r.Responses { 335 opr := opResponses[i] 336 switch opr.Type() { 337 case kv.OpSet: 338 res := r.Responses[i].GetResponsePut() 339 if res == nil { 340 return nil, errNilPutResponse 341 } 342 343 if res.PrevKv != nil { 344 opr = opr.SetValue(int(res.PrevKv.Version + 1)) 345 } else { 346 opr = opr.SetValue(etcdVersionZero + 1) 347 } 348 } 349 350 opResponses[i] = opr 351 } 352 353 return kv.NewResponse().SetResponses(opResponses), nil 354 } 355 356 func (c *client) Watch(key string) (kv.ValueWatch, error) { 357 newKey := c.opts.ApplyPrefix(key) 358 c.Lock() 359 watchable, ok := c.watchables[newKey] 360 if !ok { 361 watchable = kv.NewValueWatchable() 362 c.watchables[newKey] = watchable 363 364 go c.wm.Watch(newKey) 365 366 } 367 c.Unlock() 368 _, w, err := watchable.Watch() 369 return w, err 370 } 371 372 func (c *client) getFromKVStore(key string) (kv.Value, error) { 373 var ( 374 nv kv.Value 375 err error 376 ) 377 if execErr := c.retrier.Attempt(func() error { 378 nv, err = c.get(key) 379 if err == kv.ErrNotFound { 380 // do not retry on ErrNotFound 381 return retry.NonRetryableError(err) 382 } 383 return err 384 }); execErr != nil && xerrors.GetInnerNonRetryableError(execErr) != kv.ErrNotFound { 385 return nil, execErr 386 } 387 388 return nv, nil 389 } 390 391 func (c *client) getFromEtcdEvents(key string, events []*clientv3.Event) kv.Value { 392 lastEvent := events[len(events)-1] 393 if lastEvent.Type == clientv3.EventTypeDelete { 394 c.deleteCache(key) 395 return nil 396 } 397 398 nv := newValue(lastEvent.Kv.Value, lastEvent.Kv.Version, lastEvent.Kv.ModRevision) 399 c.mergeCache(key, nv) 400 return nv 401 } 402 403 func (c *client) update(key string, events []*clientv3.Event) error { 404 var nv kv.Value 405 if len(events) == 0 { 406 var err error 407 if nv, err = c.getFromKVStore(key); err != nil { 408 // This is triggered by initializing a new watch and no value available for the key. 409 return nil 410 } 411 } else { 412 nv = c.getFromEtcdEvents(key, events) 413 } 414 415 c.RLock() 416 w, ok := c.watchables[key] 417 c.RUnlock() 418 if !ok { 419 return fmt.Errorf("unexpected: no watchable found for key: %s", key) 420 } 421 422 curValue := w.Get() 423 424 // Both current and new are nil. 425 if curValue == nil && nv == nil { 426 return nil 427 } 428 429 if nv == nil { 430 // At deletion, just update the watch to nil. 431 return w.Update(nil) 432 } 433 434 if curValue == nil || nv.IsNewer(curValue) { 435 return w.Update(nv) 436 } 437 438 return nil 439 } 440 441 func (c *client) tickAndStop(key string) bool { 442 // fast path 443 c.RLock() 444 watchable, ok := c.watchables[key] 445 c.RUnlock() 446 if !ok { 447 c.logger.Warn("unexpected: key is already cleaned up", zap.String("key", key)) 448 return true 449 } 450 451 if watchable.NumWatches() != 0 { 452 return false 453 } 454 455 // slow path 456 c.Lock() 457 defer c.Unlock() 458 watchable, ok = c.watchables[key] 459 if !ok { 460 // not expect this to happen 461 c.logger.Warn("unexpected: key is already cleaned up", zap.String("key", key)) 462 return true 463 } 464 465 if watchable.NumWatches() != 0 { 466 // a new watch has subscribed to the watchable, do not clean up 467 return false 468 } 469 470 watchable.Close() 471 delete(c.watchables, key) 472 return true 473 } 474 475 func (c *client) Set(key string, v proto.Message) (int, error) { 476 ctx, cancel := c.context() 477 defer cancel() 478 479 value, err := proto.Marshal(v) 480 if err != nil { 481 return 0, err 482 } 483 484 r, err := c.kv.Put(ctx, c.opts.ApplyPrefix(key), string(value), clientv3.WithPrevKV()) 485 if err != nil { 486 c.m.etcdPutError.Inc(1) 487 return 0, err 488 } 489 490 // if there is no prev kv, means this is the first version of the key 491 if r.PrevKv == nil { 492 return etcdVersionZero + 1, nil 493 } 494 495 return int(r.PrevKv.Version + 1), nil 496 } 497 498 func (c *client) SetIfNotExists(key string, v proto.Message) (int, error) { 499 version, err := c.CheckAndSet(key, etcdVersionZero, v) 500 if err == kv.ErrVersionMismatch { 501 err = kv.ErrAlreadyExists 502 } 503 return version, err 504 } 505 506 func (c *client) CheckAndSet(key string, version int, v proto.Message) (int, error) { 507 ctx, cancel := c.context() 508 defer cancel() 509 510 value, err := proto.Marshal(v) 511 if err != nil { 512 return 0, err 513 } 514 515 key = c.opts.ApplyPrefix(key) 516 r, err := c.kv.Txn(ctx). 517 If(clientv3.Compare(clientv3.Version(key), kv.CompareEqual.String(), version)). 518 Then(clientv3.OpPut(key, string(value))). 519 Commit() 520 if err != nil { 521 c.m.etcdTnxError.Inc(1) 522 return 0, err 523 } 524 if !r.Succeeded { 525 return 0, kv.ErrVersionMismatch 526 } 527 528 return version + 1, nil 529 } 530 531 func (c *client) Delete(key string) (kv.Value, error) { 532 ctx, cancel := c.context() 533 defer cancel() 534 535 key = c.opts.ApplyPrefix(key) 536 537 r, err := c.kv.Delete(ctx, key, clientv3.WithPrevKV()) 538 if err != nil { 539 return nil, err 540 } 541 542 if r.Deleted == 0 { 543 return nil, kv.ErrNotFound 544 } 545 546 prevKV := newValue(r.PrevKvs[0].Value, r.PrevKvs[0].Version, r.PrevKvs[0].ModRevision) 547 548 c.deleteCache(key) 549 550 return prevKV, nil 551 } 552 553 func (c *client) deleteCache(key string) { 554 c.cache.Lock() 555 defer c.cache.Unlock() 556 557 // only do a delete if we actually need to 558 _, found := c.cache.Values[key] 559 if !found { 560 return 561 } 562 563 delete(c.cache.Values, key) 564 c.notifyCacheUpdate() 565 } 566 567 func (c *client) getCache(key string) (kv.Value, bool) { 568 c.cache.RLock() 569 v, ok := c.cache.Values[key] 570 c.cache.RUnlock() 571 572 return v, ok 573 } 574 575 func (c *client) mergeCache(key string, v *value) { 576 c.cache.Lock() 577 578 cur, ok := c.cache.Values[key] 579 if !ok || v.IsNewer(cur) { 580 c.cache.Values[key] = v 581 c.notifyCacheUpdate() 582 } 583 584 c.cache.Unlock() 585 } 586 587 func (c *client) notifyCacheUpdate() { 588 // notify that cached data is updated 589 select { 590 case c.cacheUpdatedCh <- struct{}{}: 591 default: 592 } 593 } 594 595 func (c *client) writeCacheToFile() error { 596 file, err := os.Create(c.cacheFile) 597 if err != nil { 598 c.m.diskWriteError.Inc(1) 599 c.logger.Warn("error creating cache file", zap.String("file", c.cacheFile), zap.Error(err)) 600 return fmt.Errorf("invalid cache file: %s", c.cacheFile) 601 } 602 603 encoder := json.NewEncoder(file) 604 c.cache.RLock() 605 err = encoder.Encode(c.cache) 606 c.cache.RUnlock() 607 608 if err != nil { 609 c.m.diskWriteError.Inc(1) 610 c.logger.Warn("error encoding values", zap.Error(err)) 611 return err 612 } 613 614 if err = file.Close(); err != nil { 615 c.m.diskWriteError.Inc(1) 616 c.logger.Warn("error closing cache file", zap.String("file", c.cacheFile), zap.Error(err)) 617 } 618 619 return nil 620 } 621 622 func (c *client) createCacheDir(fm os.FileMode) error { 623 path := path.Dir(c.opts.CacheFileFn()(c.opts.Prefix())) 624 if err := os.MkdirAll(path, fm); err != nil { 625 c.m.diskWriteError.Inc(1) 626 c.logger.Warn("error creating cache directory", 627 zap.String("path", path), 628 zap.Error(err), 629 ) 630 return err 631 } 632 633 c.logger.Info("successfully created new cache dir", 634 zap.String("path", path), 635 zap.Int("mode", int(fm)), 636 ) 637 638 return nil 639 } 640 641 func (c *client) initCache(fm os.FileMode) error { 642 if err := c.createCacheDir(fm); err != nil { 643 c.m.diskWriteError.Inc(1) 644 return fmt.Errorf("error creating cache directory: %s", err) 645 } 646 file, err := os.Open(c.cacheFile) 647 if err != nil { 648 c.m.diskReadError.Inc(1) 649 return fmt.Errorf("error opening cache file %s: %v", c.cacheFile, err) 650 } 651 652 // Read bootstrap file 653 decoder := json.NewDecoder(file) 654 655 if err := decoder.Decode(c.cache); err != nil { 656 c.m.diskReadError.Inc(1) 657 return fmt.Errorf("error reading cache file %s: %v", c.cacheFile, err) 658 } 659 660 return nil 661 } 662 663 func (c *client) context() (context.Context, context.CancelFunc) { 664 ctx := context.Background() 665 cancel := noopCancel 666 if c.opts.RequestTimeout() > 0 { 667 ctx, cancel = context.WithTimeout(ctx, c.opts.RequestTimeout()) 668 } 669 670 return ctx, cancel 671 } 672 673 type valueCache struct { 674 sync.RWMutex 675 676 Values map[string]*value `json:"values"` 677 } 678 679 func newCache() *valueCache { 680 return &valueCache{Values: make(map[string]*value)} 681 } 682 683 type value struct { 684 Val []byte `json:"value"` 685 Ver int64 `json:"version"` 686 Rev int64 `json:"revision"` 687 } 688 689 func newValue(val []byte, ver, rev int64) *value { 690 return &value{ 691 Val: val, 692 Ver: ver, 693 Rev: rev, 694 } 695 } 696 697 func (c *value) IsNewer(other kv.Value) bool { 698 othervalue, ok := other.(*value) 699 if ok { 700 return c.Rev > othervalue.Rev 701 } 702 703 return c.Version() > other.Version() 704 } 705 706 func (c *value) Unmarshal(v proto.Message) error { 707 err := proto.Unmarshal(c.Val, v) 708 709 return err 710 } 711 712 func (c *value) Version() int { 713 return int(c.Ver) 714 }