github.com/cilium/cilium@v1.16.2/pkg/allocator/allocator.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package allocator 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 11 "github.com/google/uuid" 12 "github.com/sirupsen/logrus" 13 14 "github.com/cilium/cilium/pkg/backoff" 15 "github.com/cilium/cilium/pkg/idpool" 16 "github.com/cilium/cilium/pkg/inctimer" 17 "github.com/cilium/cilium/pkg/kvstore" 18 "github.com/cilium/cilium/pkg/lock" 19 "github.com/cilium/cilium/pkg/logging" 20 "github.com/cilium/cilium/pkg/logging/logfields" 21 "github.com/cilium/cilium/pkg/option" 22 "github.com/cilium/cilium/pkg/rate" 23 "github.com/cilium/cilium/pkg/time" 24 ) 25 26 var ( 27 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "allocator") 28 ) 29 30 const ( 31 // maxAllocAttempts is the number of attempted allocation requests 32 // performed before failing. 33 maxAllocAttempts = 16 34 ) 35 36 // Allocator is a distributed ID allocator backed by a KVstore. It maps 37 // arbitrary keys to identifiers. Multiple users on different cluster nodes can 38 // in parallel request the ID for keys and are guaranteed to retrieve the same 39 // ID for an identical key. 40 // 41 // While the details of how keys are stored is delegated to Backend 42 // implementations, some expectations exist. See pkg/kvstore/allocator for 43 // details about the kvstore implementation. 44 // 45 // A node takes a reference to an identity when it is in-use on that node, and 46 // the identity remains in-use if there is any node reference to it. When an 47 // identity no longer has any node references, it may be garbage collected. No 48 // guarantees are made at that point and the numeric identity may be reused. 49 // Note that the numeric IDs are selected locally and verified with the Backend. 50 // 51 // Lookup ID by key: 52 // 1. Return ID from local cache updated by watcher (no Backend interactions) 53 // 2. Do ListPrefix() on slave key excluding node suffix, return the first 54 // result that matches the exact prefix. 55 // 56 // Lookup key by ID: 57 // 1. Return key from local cache updated by watcher (no Backend interactions) 58 // 2. Do Get() on master key, return result 59 // 60 // Allocate: 61 // 1. Check local key cache, increment, and return if key is already in use 62 // locally (no Backend interactions) 63 // 2. Check local cache updated by watcher, if... 64 // 65 // ... match found: 66 // 67 // 2.1 Create a new slave key. This operation is potentially racy as the master 68 // key can be removed in the meantime. 69 // - etcd: Create is made conditional on existence of master key 70 // - consul: locking 71 // 72 // ... match not found: 73 // 74 // 2.1 Select new unused id from local cache 75 // 2.2 Create a new master key with the condition that it may not exist 76 // 2.3 Create a new slave key 77 // 78 // 1.1. If found, increment and return (no Backend interactions) 79 // 2. Lookup ID by key in local cache or via first slave key found in Backend 80 // 81 // Release: 82 // 1. Reduce local reference count until last use (no Backend interactions) 83 // 2. Delete slave key (basePath/value/key1/node1) 84 // This automatically guarantees that when the last node has released the 85 // key, the key is no longer found by Get() 86 // 3. If the node goes down, all slave keys of that node are removed after 87 // the TTL expires (auto release). 88 type Allocator struct { 89 // events is a channel which will receive AllocatorEvent as IDs are 90 // added, modified or removed from the allocator 91 events AllocatorEventSendChan 92 93 // keyType is an instance of the type to be used as allocator key. 94 keyType AllocatorKey 95 96 // min is the lower limit when allocating IDs. The allocator will never 97 // allocate an ID lesser than this value. 98 min idpool.ID 99 100 // max is the upper limit when allocating IDs. The allocator will never 101 // allocate an ID greater than this value. 102 max idpool.ID 103 104 // prefixMask if set, will be ORed to all selected IDs prior to 105 // allocation 106 prefixMask idpool.ID 107 108 // localKeys contains all keys including their reference count for keys 109 // which have been allocated and are in local use 110 localKeys *localKeys 111 112 // suffix is the suffix attached to keys which must be node specific, 113 // this is typical set to the node's IP address 114 suffix string 115 116 // backoffTemplate is the backoff configuration while allocating 117 backoffTemplate backoff.Exponential 118 119 // slaveKeysMutex protects the concurrent access of the slave key by this 120 // agent. 121 slaveKeysMutex lock.Mutex 122 123 // mainCache is the main cache, representing the allocator contents of 124 // the primary kvstore connection 125 mainCache cache 126 127 // remoteCachesMutex protects accesse to remoteCaches 128 remoteCachesMutex lock.RWMutex 129 130 // remoteCaches is the list of additional remote caches being watched 131 // in addition to the main cache 132 remoteCaches map[string]*RemoteCache 133 134 // stopGC is the channel used to stop the garbage collector 135 stopGC chan struct{} 136 137 // initialListDone is a channel that is closed when the initial 138 // synchronization has completed 139 initialListDone waitChan 140 141 // idPool maintains a pool of available ids for allocation. 142 idPool *idpool.IDPool 143 144 // enableMasterKeyProtection if true, causes master keys that are still in 145 // local use to be automatically re-created 146 enableMasterKeyProtection bool 147 148 // disableGC disables the garbage collector 149 disableGC bool 150 151 // disableAutostart prevents starting the allocator when it is initialized 152 disableAutostart bool 153 154 // cacheValidators implement extra validations of retrieved identities, e.g., 155 // to ensure that they belong to the expected range. 156 cacheValidators []CacheValidator 157 158 // backend is the upstream, shared, backend to which we syncronize local 159 // information 160 backend Backend 161 } 162 163 // AllocatorOption is the base type for allocator options 164 type AllocatorOption func(*Allocator) 165 166 // CacheValidator is the type of the validation functions triggered to filter out 167 // invalid notification events. 168 type CacheValidator func(kind AllocatorChangeKind, id idpool.ID, key AllocatorKey) error 169 170 // NewAllocatorForGC returns an allocator that can be used to run RunGC() 171 // 172 // The allocator can be configured by passing in additional options: 173 // - WithMin(id) - minimum ID to allocate (default: 1) 174 // - WithMax(id) - maximum ID to allocate (default max(uint64)) 175 func NewAllocatorForGC(backend Backend, opts ...AllocatorOption) *Allocator { 176 a := &Allocator{ 177 backend: backend, 178 min: idpool.ID(1), 179 max: idpool.ID(^uint64(0)), 180 } 181 182 for _, fn := range opts { 183 fn(a) 184 } 185 186 return a 187 } 188 189 type GCStats struct { 190 // Alive is the number of identities alive 191 Alive int 192 193 // Deleted is the number of identities deleted 194 Deleted int 195 } 196 197 // Backend represents clients to remote ID allocation systems, such as KV 198 // Stores. These are used to coordinate key->ID allocation between cilium 199 // nodes. 200 type Backend interface { 201 // DeleteAllKeys will delete all keys. It is used in tests. 202 DeleteAllKeys(ctx context.Context) 203 204 // Encode encodes a key string as required to conform to the key 205 // restrictions of the backend 206 Encode(string) string 207 208 // AllocateID creates a new key->ID association. This is expected to be a 209 // create-only operation, and the ID may be allocated by another node. An 210 // error in that case is not expected to be fatal. The actual ID is obtained 211 // by Allocator from the local idPool, which is updated with used-IDs as the 212 // Backend makes calls to the handler in ListAndWatch. 213 // The implementation of the backend might return an AllocatorKey that is 214 // a copy of 'key' with an internal reference of the backend key or, if it 215 // doesn't use the internal reference of the backend key it simply returns 216 // 'key'. In case of an error the returned 'AllocatorKey' should be nil. 217 AllocateID(ctx context.Context, id idpool.ID, key AllocatorKey) (AllocatorKey, error) 218 219 // AllocateIDIfLocked behaves like AllocateID but when lock is non-nil the 220 // operation proceeds only if it is still valid. 221 // The implementation of the backend might return an AllocatorKey that is 222 // a copy of 'key' with an internal reference of the backend key or, if it 223 // doesn't use the internal reference of the backend key it simply returns 224 // 'key'. In case of an error the returned 'AllocatorKey' should be nil. 225 AllocateIDIfLocked(ctx context.Context, id idpool.ID, key AllocatorKey, lock kvstore.KVLocker) (AllocatorKey, error) 226 227 // AcquireReference records that this node is using this key->ID mapping. 228 // This is distinct from any reference counting within this agent; only one 229 // reference exists for this node for any number of managed endpoints using 230 // it. 231 // The semantics of cleaning up stale references is delegated to the Backend 232 // implementation. RunGC may need to be invoked. 233 // This can race, and so lock can be provided (via a Lock call, below). 234 AcquireReference(ctx context.Context, id idpool.ID, key AllocatorKey, lock kvstore.KVLocker) error 235 236 // Release releases the use of an ID associated with the provided key. It 237 // does not guard against concurrent calls to 238 // releases.Release(ctx context.Context, key AllocatorKey) (err error) 239 Release(ctx context.Context, id idpool.ID, key AllocatorKey) (err error) 240 241 // UpdateKey refreshes the record that this node is using this key -> id 242 // mapping. When reliablyMissing is set it will also recreate missing master or 243 // slave keys. 244 UpdateKey(ctx context.Context, id idpool.ID, key AllocatorKey, reliablyMissing bool) error 245 246 // UpdateKeyIfLocked behaves like UpdateKey but when lock is non-nil the operation proceeds only if it is still valid. 247 UpdateKeyIfLocked(ctx context.Context, id idpool.ID, key AllocatorKey, reliablyMissing bool, lock kvstore.KVLocker) error 248 249 // Get returns the allocated ID for this key as seen by the Backend. This may 250 // have been created by other agents. 251 Get(ctx context.Context, key AllocatorKey) (idpool.ID, error) 252 253 // GetIfLocked behaves like Get, but but when lock is non-nil the 254 // operation proceeds only if it is still valid. 255 GetIfLocked(ctx context.Context, key AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error) 256 257 // GetByID returns the key associated with this ID, as seen by the Backend. 258 // This may have been created by other agents. 259 GetByID(ctx context.Context, id idpool.ID) (AllocatorKey, error) 260 261 // Lock provides an opaque lock object that can be used, later, to ensure 262 // that the key has not changed since the lock was created. This can be done 263 // with GetIfLocked. 264 Lock(ctx context.Context, key AllocatorKey) (kvstore.KVLocker, error) 265 266 // ListAndWatch begins synchronizing the local Backend instance with its 267 // remote. 268 ListAndWatch(ctx context.Context, handler CacheMutations, stopChan chan struct{}) 269 270 // RunGC reaps stale or unused identities within the Backend and makes them 271 // available for reuse. It is used by the cilium-operator and is not invoked 272 // by cilium-agent. 273 // Note: not all Backend implemenations rely on this, such as the kvstore 274 // backends, and may use leases to expire keys. 275 RunGC(ctx context.Context, rateLimit *rate.Limiter, staleKeysPrevRound map[string]uint64, minID idpool.ID, maxID idpool.ID) (map[string]uint64, *GCStats, error) 276 277 // RunLocksGC reaps stale or unused locks within the Backend. It is used by 278 // the cilium-operator and is not invoked by cilium-agent. Returns 279 // a map of locks currently being held in the KVStore including the ones 280 // that failed to be GCed. 281 // Note: not all Backend implementations rely on this, such as the kvstore 282 // backends, and may use leases to expire keys. 283 RunLocksGC(ctx context.Context, staleKeysPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error) 284 285 // Status returns a human-readable status of the Backend. 286 Status() (string, error) 287 } 288 289 // NewAllocator creates a new Allocator. Any type can be used as key as long as 290 // the type implements the AllocatorKey interface. A variable of the type has 291 // to be passed into NewAllocator() to make the type known. The specified base 292 // path is used to prefix all keys in the kvstore. The provided path must be 293 // unique. 294 // 295 // The allocator can be configured by passing in additional options: 296 // - WithEvents() - enable Events channel 297 // - WithMin(id) - minimum ID to allocate (default: 1) 298 // - WithMax(id) - maximum ID to allocate (default max(uint64)) 299 // 300 // After creation, IDs can be allocated with Allocate() and released with 301 // Release() 302 func NewAllocator(typ AllocatorKey, backend Backend, opts ...AllocatorOption) (*Allocator, error) { 303 a := &Allocator{ 304 keyType: typ, 305 backend: backend, 306 min: idpool.ID(1), 307 max: idpool.ID(^uint64(0)), 308 localKeys: newLocalKeys(), 309 stopGC: make(chan struct{}), 310 suffix: uuid.New().String()[:10], 311 remoteCaches: map[string]*RemoteCache{}, 312 backoffTemplate: backoff.Exponential{ 313 Min: time.Duration(20) * time.Millisecond, 314 Factor: 2.0, 315 }, 316 } 317 318 for _, fn := range opts { 319 fn(a) 320 } 321 322 a.mainCache = newCache(a) 323 324 if a.suffix == "<nil>" { 325 return nil, errors.New("allocator suffix is <nil> and unlikely unique") 326 } 327 328 if a.min < 1 { 329 return nil, errors.New("minimum ID must be >= 1") 330 } 331 332 if a.max <= a.min { 333 return nil, fmt.Errorf("maximum ID must be greater than minimum ID: configured max %v, min %v", a.max, a.min) 334 } 335 336 a.idPool = idpool.NewIDPool(a.min, a.max) 337 338 if !a.disableAutostart { 339 a.start() 340 } 341 342 return a, nil 343 } 344 345 func (a *Allocator) start() { 346 a.initialListDone = a.mainCache.start() 347 if !a.disableGC { 348 go func() { 349 select { 350 case <-a.initialListDone: 351 case <-time.After(option.Config.AllocatorListTimeout): 352 log.Fatalf("Timeout while waiting for initial allocator state") 353 } 354 a.startLocalKeySync() 355 }() 356 } 357 } 358 359 // WithBackend sets this allocator to use backend. It is expected to be used at 360 // initialization. 361 func WithBackend(backend Backend) AllocatorOption { 362 return func(a *Allocator) { 363 a.backend = backend 364 } 365 } 366 367 // WithEvents enables receiving of events. 368 // 369 // CAUTION: When using this function. The provided channel must be continuously 370 // read while NewAllocator() is being called to ensure that the channel does 371 // not block indefinitely while NewAllocator() emits events on it while 372 // populating the initial cache. 373 func WithEvents(events AllocatorEventSendChan) AllocatorOption { 374 return func(a *Allocator) { a.events = events } 375 } 376 377 // WithMin sets the minimum identifier to be allocated 378 func WithMin(id idpool.ID) AllocatorOption { 379 return func(a *Allocator) { a.min = id } 380 } 381 382 // WithMax sets the maximum identifier to be allocated 383 func WithMax(id idpool.ID) AllocatorOption { 384 return func(a *Allocator) { a.max = id } 385 } 386 387 // WithPrefixMask sets the prefix used for all ID allocations. If set, the mask 388 // will be ORed to all selected IDs prior to allocation. It is the 389 // responsibility of the caller to ensure that the mask is not conflicting with 390 // min..max. 391 func WithPrefixMask(mask idpool.ID) AllocatorOption { 392 return func(a *Allocator) { a.prefixMask = mask } 393 } 394 395 // WithMasterKeyProtection will watch for delete events on master keys and 396 // re-created them if local usage suggests that the key is still in use 397 func WithMasterKeyProtection() AllocatorOption { 398 return func(a *Allocator) { a.enableMasterKeyProtection = true } 399 } 400 401 // WithoutGC disables the use of the garbage collector 402 func WithoutGC() AllocatorOption { 403 return func(a *Allocator) { a.disableGC = true } 404 } 405 406 // WithoutAutostart prevents starting the allocator when it is initialized 407 func WithoutAutostart() AllocatorOption { 408 return func(a *Allocator) { a.disableAutostart = true } 409 } 410 411 // WithCacheValidator registers a validator triggered for each identity 412 // notification event to filter out invalid IDs and keys. 413 func WithCacheValidator(validator CacheValidator) AllocatorOption { 414 return func(a *Allocator) { a.cacheValidators = append(a.cacheValidators, validator) } 415 } 416 417 // GetEvents returns the events channel given to the allocator when 418 // constructed. 419 // Note: This channel is not owned by the allocator! 420 func (a *Allocator) GetEvents() AllocatorEventSendChan { 421 return a.events 422 } 423 424 // Delete deletes an allocator and stops the garbage collector 425 func (a *Allocator) Delete() { 426 close(a.stopGC) 427 a.mainCache.stop() 428 } 429 430 // WaitForInitialSync waits until the initial sync is complete 431 func (a *Allocator) WaitForInitialSync(ctx context.Context) error { 432 select { 433 case <-a.initialListDone: 434 case <-ctx.Done(): 435 return fmt.Errorf("identity sync was cancelled: %w", ctx.Err()) 436 } 437 438 return nil 439 } 440 441 // RangeFunc is the function called by RangeCache 442 type RangeFunc func(idpool.ID, AllocatorKey) 443 444 // ForeachCache iterates over the allocator cache and calls RangeFunc on each 445 // cached entry 446 func (a *Allocator) ForeachCache(cb RangeFunc) { 447 a.mainCache.foreach(cb) 448 449 a.remoteCachesMutex.RLock() 450 for _, rc := range a.remoteCaches { 451 rc.cache.foreach(cb) 452 } 453 a.remoteCachesMutex.RUnlock() 454 } 455 456 // selectAvailableID selects an available ID. 457 // Returns a triple of the selected ID ORed with prefixMask, the ID string and 458 // the originally selected ID. 459 func (a *Allocator) selectAvailableID() (idpool.ID, string, idpool.ID) { 460 if id := a.idPool.LeaseAvailableID(); id != idpool.NoID { 461 unmaskedID := id 462 id |= a.prefixMask 463 return id, id.String(), unmaskedID 464 } 465 466 return 0, "", 0 467 } 468 469 // AllocatorKey is the interface to implement in order for a type to be used as 470 // key for the allocator. The key's data is assumed to be a collection of 471 // pkg/label.Label, and the functions reflect this somewhat. 472 type AllocatorKey interface { 473 fmt.Stringer 474 475 // GetKey returns the canonical string representation of the key 476 GetKey() string 477 478 // PutKey stores the information in v into the key. This is the inverse 479 // operation to GetKey 480 PutKey(v string) AllocatorKey 481 482 // GetAsMap returns the key as a collection of "labels" with a key and value. 483 // This is the inverse operation to PutKeyFromMap. 484 GetAsMap() map[string]string 485 486 // PutKeyFromMap stores the labels in v into the key to be used later. This 487 // is the inverse operation to GetAsMap. 488 PutKeyFromMap(v map[string]string) AllocatorKey 489 490 // PutValue puts metadata inside the global identity for the given 'key' with 491 // the given 'value'. 492 PutValue(key any, value any) AllocatorKey 493 494 // Value returns the value stored in the metadata map. 495 Value(key any) any 496 } 497 498 func (a *Allocator) encodeKey(key AllocatorKey) string { 499 return a.backend.Encode(key.GetKey()) 500 } 501 502 // Return values: 503 // 1. allocated ID 504 // 2. whether the ID is newly allocated from kvstore 505 // 3. whether this is the first owner that holds a reference to the key in 506 // localkeys store 507 // 4. error in case of failure 508 func (a *Allocator) lockedAllocate(ctx context.Context, key AllocatorKey) (idpool.ID, bool, bool, error) { 509 var firstUse bool 510 511 kvstore.Trace("Allocating key in kvstore", nil, logrus.Fields{fieldKey: key}) 512 513 k := a.encodeKey(key) 514 lock, err := a.backend.Lock(ctx, key) 515 if err != nil { 516 return 0, false, false, err 517 } 518 519 defer lock.Unlock(context.Background()) 520 521 // fetch first key that matches /value/<key> while ignoring the 522 // node suffix 523 value, err := a.GetIfLocked(ctx, key, lock) 524 if err != nil { 525 return 0, false, false, err 526 } 527 528 kvstore.Trace("kvstore state is: ", nil, logrus.Fields{fieldID: value}) 529 530 a.slaveKeysMutex.Lock() 531 defer a.slaveKeysMutex.Unlock() 532 533 // We shouldn't assume the fact the master key does not exist in the kvstore 534 // that localKeys does not have it. The KVStore might have lost all of its 535 // data but the local agent still holds a reference for the given master key. 536 if value == 0 { 537 value = a.localKeys.lookupKey(k) 538 if value != 0 { 539 // re-create master key 540 if err := a.backend.UpdateKeyIfLocked(ctx, value, key, true, lock); err != nil { 541 return 0, false, false, fmt.Errorf("unable to re-create missing master key '%s': %s while allocating ID: %w", key, value, err) 542 } 543 } 544 } else { 545 _, firstUse, err = a.localKeys.allocate(k, key, value) 546 if err != nil { 547 return 0, false, false, fmt.Errorf("unable to reserve local key '%s': %w", k, err) 548 } 549 550 if firstUse { 551 log.WithField(fieldKey, k).Debug("Reserved new local key") 552 } else { 553 log.WithField(fieldKey, k).Debug("Reusing existing local key") 554 } 555 } 556 557 if value != 0 { 558 log.WithField(fieldKey, k).Info("Reusing existing global key") 559 560 if err = a.backend.AcquireReference(ctx, value, key, lock); err != nil { 561 a.localKeys.release(k) 562 return 0, false, false, fmt.Errorf("unable to create secondary key '%s': %w", k, err) 563 } 564 565 // mark the key as verified in the local cache 566 if err := a.localKeys.verify(k); err != nil { 567 log.WithError(err).Error("BUG: Unable to verify local key") 568 } 569 570 return value, false, firstUse, nil 571 } 572 573 log.WithField(fieldKey, k).Debug("Allocating new master ID") 574 id, strID, unmaskedID := a.selectAvailableID() 575 if id == 0 { 576 return 0, false, false, fmt.Errorf("no more available IDs in configured space") 577 } 578 579 kvstore.Trace("Selected available key ID", nil, logrus.Fields{fieldID: id}) 580 581 releaseKeyAndID := func() { 582 a.localKeys.release(k) 583 a.idPool.Release(unmaskedID) // This returns this ID to be re-used for other keys 584 } 585 586 oldID, firstUse, err := a.localKeys.allocate(k, key, id) 587 if err != nil { 588 a.idPool.Release(unmaskedID) 589 return 0, false, false, fmt.Errorf("unable to reserve local key '%s': %w", k, err) 590 } 591 592 // Another local writer beat us to allocating an ID for the same key, 593 // start over 594 if id != oldID { 595 releaseKeyAndID() 596 return 0, false, false, fmt.Errorf("another writer has allocated key %s", k) 597 } 598 599 // Check that this key has not been allocated in the cluster during our 600 // operation here 601 value, err = a.GetNoCache(ctx, key) 602 if err != nil { 603 releaseKeyAndID() 604 return 0, false, false, err 605 } 606 if value != 0 { 607 releaseKeyAndID() 608 return 0, false, false, fmt.Errorf("Found master key after proceeding with new allocation for %s", k) 609 } 610 611 // Assigned to 'key' from 'key2' since in case of an error, we don't replace 612 // the original 'key' variable with 'nil'. 613 key2 := key 614 key, err = a.backend.AllocateIDIfLocked(ctx, id, key2, lock) 615 if err != nil { 616 // Creation failed. Another agent most likely beat us to allocting this 617 // ID, retry. 618 releaseKeyAndID() 619 return 0, false, false, fmt.Errorf("unable to allocate ID %s for key %s: %w", strID, key2, err) 620 } 621 622 // Notify pool that leased ID is now in-use. 623 a.idPool.Use(unmaskedID) 624 625 if err = a.backend.AcquireReference(ctx, id, key, lock); err != nil { 626 // We will leak the master key here as the key has already been 627 // exposed and may be in use by other nodes. The garbage 628 // collector will release it again. 629 releaseKeyAndID() 630 return 0, false, false, fmt.Errorf("secondary key creation failed '%s': %w", k, err) 631 } 632 633 // mark the key as verified in the local cache 634 if err := a.localKeys.verify(k); err != nil { 635 log.WithError(err).Error("BUG: Unable to verify local key") 636 } 637 638 log.WithField(fieldKey, k).Info("Allocated new global key") 639 640 return id, true, firstUse, nil 641 } 642 643 // Allocate will retrieve the ID for the provided key. If no ID has been 644 // allocated for this key yet, a key will be allocated. If allocation fails, 645 // most likely due to a parallel allocation of the same ID by another user, 646 // allocation is re-attempted for maxAllocAttempts times. 647 // 648 // Return values: 649 // 1. allocated ID 650 // 2. whether the ID is newly allocated from kvstore 651 // 3. whether this is the first owner that holds a reference to the key in 652 // localkeys store 653 // 4. error in case of failure 654 func (a *Allocator) Allocate(ctx context.Context, key AllocatorKey) (idpool.ID, bool, bool, error) { 655 var ( 656 err error 657 value idpool.ID 658 isNew bool 659 firstUse bool 660 k = a.encodeKey(key) 661 ) 662 663 log.WithField(fieldKey, key).Debug("Allocating key") 664 665 select { 666 case <-a.initialListDone: 667 case <-ctx.Done(): 668 return 0, false, false, fmt.Errorf("allocation was cancelled while waiting for initial key list to be received: %w", ctx.Err()) 669 } 670 671 kvstore.Trace("Allocating from kvstore", nil, logrus.Fields{fieldKey: key}) 672 673 // make a copy of the template and customize it 674 boff := a.backoffTemplate 675 boff.Name = key.String() 676 677 for attempt := 0; attempt < maxAllocAttempts; attempt++ { 678 // Check our list of local keys already in use and increment the 679 // refcnt. The returned key must be released afterwards. No kvstore 680 // operation was performed for this allocation. 681 // We also do this on every loop as a different Allocate call might have 682 // allocated the key while we are attempting to allocate in this 683 // execution thread. It does not hurt to check if localKeys contains a 684 // reference for the key that we are attempting to allocate. 685 if val := a.localKeys.use(k); val != idpool.NoID { 686 kvstore.Trace("Reusing local id", nil, logrus.Fields{fieldID: val, fieldKey: key}) 687 a.mainCache.insert(key, val) 688 return val, false, false, nil 689 } 690 691 // FIXME: Add non-locking variant 692 value, isNew, firstUse, err = a.lockedAllocate(ctx, key) 693 if err == nil { 694 a.mainCache.insert(key, value) 695 log.WithField(fieldKey, key).WithField(fieldID, value).Debug("Allocated key") 696 return value, isNew, firstUse, nil 697 } 698 699 scopedLog := log.WithFields(logrus.Fields{ 700 fieldKey: key, 701 logfields.Attempt: attempt, 702 }) 703 704 select { 705 case <-ctx.Done(): 706 scopedLog.WithError(ctx.Err()).Warning("Ongoing key allocation has been cancelled") 707 return 0, false, false, fmt.Errorf("key allocation cancelled: %w", ctx.Err()) 708 default: 709 scopedLog.WithError(err).Warning("Key allocation attempt failed") 710 } 711 712 kvstore.Trace("Allocation attempt failed", err, logrus.Fields{fieldKey: key, logfields.Attempt: attempt}) 713 714 if waitErr := boff.Wait(ctx); waitErr != nil { 715 return 0, false, false, waitErr 716 } 717 } 718 719 return 0, false, false, err 720 } 721 722 // GetIfLocked returns the ID which is allocated to a key. Returns an ID of NoID if no ID 723 // has been allocated to this key yet if the client is still holding the given 724 // lock. 725 func (a *Allocator) GetIfLocked(ctx context.Context, key AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error) { 726 if id := a.mainCache.get(a.encodeKey(key)); id != idpool.NoID { 727 return id, nil 728 } 729 730 return a.backend.GetIfLocked(ctx, key, lock) 731 } 732 733 // Get returns the ID which is allocated to a key. Returns an ID of NoID if no ID 734 // has been allocated to this key yet. 735 func (a *Allocator) Get(ctx context.Context, key AllocatorKey) (idpool.ID, error) { 736 if id := a.mainCache.get(a.encodeKey(key)); id != idpool.NoID { 737 return id, nil 738 } 739 740 return a.GetNoCache(ctx, key) 741 } 742 743 // GetNoCache returns the ID which is allocated to a key in the kvstore, 744 // bypassing the local copy of allocated keys. 745 func (a *Allocator) GetNoCache(ctx context.Context, key AllocatorKey) (idpool.ID, error) { 746 return a.backend.Get(ctx, key) 747 } 748 749 // GetByID returns the key associated with an ID. Returns nil if no key is 750 // associated with the ID. 751 func (a *Allocator) GetByID(ctx context.Context, id idpool.ID) (AllocatorKey, error) { 752 if key := a.mainCache.getByID(id); key != nil { 753 return key, nil 754 } 755 756 return a.backend.GetByID(ctx, id) 757 } 758 759 // GetIncludeRemoteCaches returns the ID which is allocated to a key. Includes the 760 // caches of watched remote kvstores in the query. Returns an ID of NoID if no 761 // ID has been allocated in any remote kvstore to this key yet. 762 func (a *Allocator) GetIncludeRemoteCaches(ctx context.Context, key AllocatorKey) (idpool.ID, error) { 763 encoded := a.encodeKey(key) 764 765 // check main cache first 766 if id := a.mainCache.get(encoded); id != idpool.NoID { 767 return id, nil 768 } 769 770 // check remote caches 771 a.remoteCachesMutex.RLock() 772 for _, rc := range a.remoteCaches { 773 if id := rc.cache.get(encoded); id != idpool.NoID { 774 a.remoteCachesMutex.RUnlock() 775 return id, nil 776 } 777 } 778 a.remoteCachesMutex.RUnlock() 779 780 // check main backend 781 if id, err := a.backend.Get(ctx, key); id != idpool.NoID || err != nil { 782 return id, err 783 } 784 785 // we skip checking remote backends explicitly here, to avoid 786 // accidentally overloading them in case of lookups for invalid identities 787 788 return idpool.NoID, nil 789 } 790 791 // GetByIDIncludeRemoteCaches returns the key associated with an ID. Includes 792 // the caches of watched remote kvstores in the query. 793 // Returns nil if no key is associated with the ID. 794 func (a *Allocator) GetByIDIncludeRemoteCaches(ctx context.Context, id idpool.ID) (AllocatorKey, error) { 795 // check main cache first 796 if key := a.mainCache.getByID(id); key != nil { 797 return key, nil 798 } 799 800 // check remote caches 801 a.remoteCachesMutex.RLock() 802 for _, rc := range a.remoteCaches { 803 if key := rc.cache.getByID(id); key != nil { 804 a.remoteCachesMutex.RUnlock() 805 return key, nil 806 } 807 } 808 a.remoteCachesMutex.RUnlock() 809 810 // check main backend 811 if key, err := a.backend.GetByID(ctx, id); key != nil || err != nil { 812 return key, err 813 } 814 815 // we skip checking remote backends explicitly here, to avoid 816 // accidentally overloading them in case of lookups for invalid identities 817 818 return nil, nil 819 } 820 821 // Release releases the use of an ID associated with the provided key. After 822 // the last user has released the ID, the key is removed in the KVstore and 823 // the returned lastUse value is true. 824 func (a *Allocator) Release(ctx context.Context, key AllocatorKey) (lastUse bool, err error) { 825 log.WithField(fieldKey, key).Info("Releasing key") 826 827 select { 828 case <-a.initialListDone: 829 case <-ctx.Done(): 830 return false, fmt.Errorf("release was cancelled while waiting for initial key list to be received: %w", ctx.Err()) 831 } 832 833 k := a.encodeKey(key) 834 835 a.slaveKeysMutex.Lock() 836 defer a.slaveKeysMutex.Unlock() 837 838 // release the key locally, if it was the last use, remove the node 839 // specific value key to remove the global reference mark 840 var id idpool.ID 841 lastUse, id, err = a.localKeys.release(k) 842 if err != nil { 843 return lastUse, err 844 } 845 if lastUse { 846 // Since in CRD mode we don't have a way to map which identity is being 847 // used by a node, we need to also pass the ID to the release function. 848 // This allows the CRD store to find the right identity by its ID and 849 // remove the node reference on that identity. 850 a.backend.Release(ctx, id, key) 851 } 852 853 return lastUse, err 854 } 855 856 // RunGC scans the kvstore for unused master keys and removes them 857 func (a *Allocator) RunGC(rateLimit *rate.Limiter, staleKeysPrevRound map[string]uint64) (map[string]uint64, *GCStats, error) { 858 return a.backend.RunGC(context.TODO(), rateLimit, staleKeysPrevRound, a.min, a.max) 859 } 860 861 // RunLocksGC scans the kvstore for stale locks and removes them 862 func (a *Allocator) RunLocksGC(ctx context.Context, staleLocksPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error) { 863 return a.backend.RunLocksGC(ctx, staleLocksPrevRound) 864 } 865 866 // DeleteAllKeys will delete all keys. It is expected to be used in tests. 867 func (a *Allocator) DeleteAllKeys() { 868 a.backend.DeleteAllKeys(context.TODO()) 869 } 870 871 // syncLocalKeys checks the kvstore and verifies that a master key exists for 872 // all locally used allocations. This will restore master keys if deleted for 873 // some reason. 874 func (a *Allocator) syncLocalKeys() error { 875 // Create a local copy of all local allocations to not require to hold 876 // any locks while performing kvstore operations. Local use can 877 // disappear while we perform the sync but that is fine as worst case, 878 // a master key is created for a slave key that no longer exists. The 879 // garbage collector will remove it again. 880 ids := a.localKeys.getVerifiedIDs() 881 882 for id, value := range ids { 883 if err := a.backend.UpdateKey(context.TODO(), id, value, false); err != nil { 884 log.WithError(err).WithFields(logrus.Fields{ 885 fieldKey: value, 886 fieldID: id, 887 }).Warning("Unable to sync key") 888 } 889 } 890 891 return nil 892 } 893 894 func (a *Allocator) startLocalKeySync() { 895 go func(a *Allocator) { 896 kvTimer, kvTimerDone := inctimer.New() 897 defer kvTimerDone() 898 for { 899 if err := a.syncLocalKeys(); err != nil { 900 log.WithError(err).Warning("Unable to run local key sync routine") 901 } 902 903 select { 904 case <-a.stopGC: 905 log.Debug("Stopped master key sync routine") 906 return 907 case <-kvTimer.After(option.Config.KVstorePeriodicSync): 908 } 909 } 910 }(a) 911 } 912 913 // AllocatorEventChan is a channel to receive allocator events on 914 type AllocatorEventChan chan AllocatorEvent 915 916 // Send- and receive-only versions of the above. 917 type AllocatorEventRecvChan = <-chan AllocatorEvent 918 type AllocatorEventSendChan = chan<- AllocatorEvent 919 920 // AllocatorEvent is an event sent over AllocatorEventChan 921 type AllocatorEvent struct { 922 // Typ is the type of event (upsert / delete) 923 Typ AllocatorChangeKind 924 925 // ID is the allocated ID 926 ID idpool.ID 927 928 // Key is the key associated with the ID 929 Key AllocatorKey 930 } 931 932 // RemoteCache represents the cache content of an additional kvstore managing 933 // identities. The contents are not directly accessible but will be merged into 934 // the ForeachCache() function. 935 type RemoteCache struct { 936 name string 937 938 allocator *Allocator 939 cache *cache 940 941 watchFunc func(ctx context.Context, remote *RemoteCache, onSync func(context.Context)) 942 } 943 944 func (a *Allocator) NewRemoteCache(remoteName string, remoteAlloc *Allocator) *RemoteCache { 945 return &RemoteCache{ 946 name: remoteName, 947 allocator: remoteAlloc, 948 cache: &remoteAlloc.mainCache, 949 950 watchFunc: a.WatchRemoteKVStore, 951 } 952 } 953 954 // WatchRemoteKVStore starts watching an allocator base prefix the kvstore 955 // represents by the provided backend. A local cache of all identities of that 956 // kvstore will be maintained in the RemoteCache structure returned and will 957 // start being reported in the identities returned by the ForeachCache() 958 // function. RemoteName should be unique per logical "remote". 959 func (a *Allocator) WatchRemoteKVStore(ctx context.Context, rc *RemoteCache, onSync func(context.Context)) { 960 scopedLog := log.WithField(logfields.ClusterName, rc.name) 961 scopedLog.Info("Starting remote kvstore watcher") 962 963 rc.allocator.start() 964 965 select { 966 case <-ctx.Done(): 967 scopedLog.Debug("Context canceled before remote kvstore watcher synchronization completed: stale identities will now be drained") 968 rc.close() 969 970 a.remoteCachesMutex.RLock() 971 old := a.remoteCaches[rc.name] 972 a.remoteCachesMutex.RUnlock() 973 974 if old != nil { 975 old.cache.mutex.RLock() 976 defer old.cache.mutex.RUnlock() 977 } 978 979 // Drain all entries that might have been received until now, and that 980 // are not present in the current cache (if any). This ensures we do not 981 // leak any stale identity, and at the same time we do not invalidate the 982 // current state. 983 rc.cache.drainIf(func(id idpool.ID) bool { 984 if old == nil { 985 return true 986 } 987 988 _, ok := old.cache.nextCache[id] 989 return !ok 990 }) 991 return 992 993 case <-rc.cache.listDone: 994 scopedLog.Info("Remote kvstore watcher successfully synchronized and registered") 995 } 996 997 a.remoteCachesMutex.Lock() 998 old := a.remoteCaches[rc.name] 999 a.remoteCaches[rc.name] = rc 1000 a.remoteCachesMutex.Unlock() 1001 1002 if old != nil { 1003 // In case of reconnection, let's emit a deletion event for all stale identities 1004 // that are no longer present in the kvstore. We take the lock of the new cache 1005 // to ensure that we observe a stable state during this process (i.e., no keys 1006 // are added/removed in the meanwhile). 1007 scopedLog.Debug("Another kvstore watcher was already registered: deleting stale identities") 1008 rc.cache.mutex.RLock() 1009 old.cache.drainIf(func(id idpool.ID) bool { 1010 _, ok := rc.cache.nextCache[id] 1011 return !ok 1012 }) 1013 rc.cache.mutex.RUnlock() 1014 } 1015 1016 // Execute the on-sync callback handler. 1017 onSync(ctx) 1018 1019 <-ctx.Done() 1020 rc.close() 1021 scopedLog.Info("Stopped remote kvstore watcher") 1022 } 1023 1024 // RemoveRemoteKVStore removes any reference to a remote allocator / kvstore, emitting 1025 // a deletion event for all previously known identities. 1026 func (a *Allocator) RemoveRemoteKVStore(remoteName string) { 1027 a.remoteCachesMutex.Lock() 1028 old := a.remoteCaches[remoteName] 1029 delete(a.remoteCaches, remoteName) 1030 a.remoteCachesMutex.Unlock() 1031 1032 if old != nil { 1033 old.cache.drain() 1034 log.WithField(logfields.ClusterName, remoteName).Info("Remote kvstore watcher unregistered") 1035 } 1036 } 1037 1038 // Watch starts watching the remote kvstore and synchronize the identities in 1039 // the local cache. It blocks until the context is closed. 1040 func (rc *RemoteCache) Watch(ctx context.Context, onSync func(context.Context)) { 1041 rc.watchFunc(ctx, rc, onSync) 1042 } 1043 1044 // NumEntries returns the number of entries in the remote cache 1045 func (rc *RemoteCache) NumEntries() int { 1046 if rc == nil { 1047 return 0 1048 } 1049 1050 return rc.cache.numEntries() 1051 } 1052 1053 // Synced returns whether the initial list of entries has been retrieved from 1054 // the kvstore, and new events are currently being watched. 1055 func (rc *RemoteCache) Synced() bool { 1056 if rc == nil { 1057 return false 1058 } 1059 1060 select { 1061 case <-rc.cache.stopChan: 1062 return false 1063 default: 1064 select { 1065 case <-rc.cache.listDone: 1066 return true 1067 default: 1068 return false 1069 } 1070 } 1071 } 1072 1073 // close stops watching for identities in the kvstore associated with the 1074 // remote cache. 1075 func (rc *RemoteCache) close() { 1076 rc.cache.allocator.Delete() 1077 } 1078 1079 // Observe the identity changes. Conforms to stream.Observable. 1080 // Replays the current state of the cache when subscribing. 1081 func (a *Allocator) Observe(ctx context.Context, next func(AllocatorChange), complete func(error)) { 1082 a.mainCache.Observe(ctx, next, complete) 1083 }