github.com/cilium/cilium@v1.16.2/pkg/allocator/allocator.go

github.com/cilium/cilium@v1.16.2/pkg/allocator/allocator.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package allocator
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  
    11  	"github.com/google/uuid"
    12  	"github.com/sirupsen/logrus"
    13  
    14  	"github.com/cilium/cilium/pkg/backoff"
    15  	"github.com/cilium/cilium/pkg/idpool"
    16  	"github.com/cilium/cilium/pkg/inctimer"
    17  	"github.com/cilium/cilium/pkg/kvstore"
    18  	"github.com/cilium/cilium/pkg/lock"
    19  	"github.com/cilium/cilium/pkg/logging"
    20  	"github.com/cilium/cilium/pkg/logging/logfields"
    21  	"github.com/cilium/cilium/pkg/option"
    22  	"github.com/cilium/cilium/pkg/rate"
    23  	"github.com/cilium/cilium/pkg/time"
    24  )
    25  
    26  var (
    27  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "allocator")
    28  )
    29  
    30  const (
    31  	// maxAllocAttempts is the number of attempted allocation requests
    32  	// performed before failing.
    33  	maxAllocAttempts = 16
    34  )
    35  
    36  // Allocator is a distributed ID allocator backed by a KVstore. It maps
    37  // arbitrary keys to identifiers. Multiple users on different cluster nodes can
    38  // in parallel request the ID for keys and are guaranteed to retrieve the same
    39  // ID for an identical key.
    40  //
    41  // While the details of how keys are stored is delegated to Backend
    42  // implementations, some expectations exist. See pkg/kvstore/allocator for
    43  // details about the kvstore implementation.
    44  //
    45  // A node takes a reference to an identity when it is in-use on that node, and
    46  // the identity remains in-use if there is any node reference to it. When an
    47  // identity no longer has any node references, it may be garbage collected. No
    48  // guarantees are made at that point and the numeric identity may be reused.
    49  // Note that the numeric IDs are selected locally and verified with the Backend.
    50  //
    51  // Lookup ID by key:
    52  //  1. Return ID from local cache updated by watcher (no Backend interactions)
    53  //  2. Do ListPrefix() on slave key excluding node suffix, return the first
    54  //     result that matches the exact prefix.
    55  //
    56  // Lookup key by ID:
    57  //  1. Return key from local cache updated by watcher (no Backend interactions)
    58  //  2. Do Get() on master key, return result
    59  //
    60  // Allocate:
    61  //  1. Check local key cache, increment, and return if key is already in use
    62  //     locally (no Backend interactions)
    63  //  2. Check local cache updated by watcher, if...
    64  //
    65  // ... match found:
    66  //
    67  //	2.1 Create a new slave key. This operation is potentially racy as the master
    68  //	    key can be removed in the meantime.
    69  //	    - etcd: Create is made conditional on existence of master key
    70  //	    - consul: locking
    71  //
    72  // ... match not found:
    73  //
    74  //	2.1 Select new unused id from local cache
    75  //	2.2 Create a new master key with the condition that it may not exist
    76  //	2.3 Create a new slave key
    77  //
    78  // 1.1. If found, increment and return (no Backend interactions)
    79  // 2. Lookup ID by key in local cache or via first slave key found in Backend
    80  //
    81  // Release:
    82  //  1. Reduce local reference count until last use (no Backend interactions)
    83  //  2. Delete slave key (basePath/value/key1/node1)
    84  //     This automatically guarantees that when the last node has released the
    85  //     key, the key is no longer found by Get()
    86  //  3. If the node goes down, all slave keys of that node are removed after
    87  //     the TTL expires (auto release).
    88  type Allocator struct {
    89  	// events is a channel which will receive AllocatorEvent as IDs are
    90  	// added, modified or removed from the allocator
    91  	events AllocatorEventSendChan
    92  
    93  	// keyType is an instance of the type to be used as allocator key.
    94  	keyType AllocatorKey
    95  
    96  	// min is the lower limit when allocating IDs. The allocator will never
    97  	// allocate an ID lesser than this value.
    98  	min idpool.ID
    99  
   100  	// max is the upper limit when allocating IDs. The allocator will never
   101  	// allocate an ID greater than this value.
   102  	max idpool.ID
   103  
   104  	// prefixMask if set, will be ORed to all selected IDs prior to
   105  	// allocation
   106  	prefixMask idpool.ID
   107  
   108  	// localKeys contains all keys including their reference count for keys
   109  	// which have been allocated and are in local use
   110  	localKeys *localKeys
   111  
   112  	// suffix is the suffix attached to keys which must be node specific,
   113  	// this is typical set to the node's IP address
   114  	suffix string
   115  
   116  	// backoffTemplate is the backoff configuration while allocating
   117  	backoffTemplate backoff.Exponential
   118  
   119  	// slaveKeysMutex protects the concurrent access of the slave key by this
   120  	// agent.
   121  	slaveKeysMutex lock.Mutex
   122  
   123  	// mainCache is the main cache, representing the allocator contents of
   124  	// the primary kvstore connection
   125  	mainCache cache
   126  
   127  	// remoteCachesMutex protects accesse to remoteCaches
   128  	remoteCachesMutex lock.RWMutex
   129  
   130  	// remoteCaches is the list of additional remote caches being watched
   131  	// in addition to the main cache
   132  	remoteCaches map[string]*RemoteCache
   133  
   134  	// stopGC is the channel used to stop the garbage collector
   135  	stopGC chan struct{}
   136  
   137  	// initialListDone is a channel that is closed when the initial
   138  	// synchronization has completed
   139  	initialListDone waitChan
   140  
   141  	// idPool maintains a pool of available ids for allocation.
   142  	idPool *idpool.IDPool
   143  
   144  	// enableMasterKeyProtection if true, causes master keys that are still in
   145  	// local use to be automatically re-created
   146  	enableMasterKeyProtection bool
   147  
   148  	// disableGC disables the garbage collector
   149  	disableGC bool
   150  
   151  	// disableAutostart prevents starting the allocator when it is initialized
   152  	disableAutostart bool
   153  
   154  	// cacheValidators implement extra validations of retrieved identities, e.g.,
   155  	// to ensure that they belong to the expected range.
   156  	cacheValidators []CacheValidator
   157  
   158  	// backend is the upstream, shared, backend to which we syncronize local
   159  	// information
   160  	backend Backend
   161  }
   162  
   163  // AllocatorOption is the base type for allocator options
   164  type AllocatorOption func(*Allocator)
   165  
   166  // CacheValidator is the type of the validation functions triggered to filter out
   167  // invalid notification events.
   168  type CacheValidator func(kind AllocatorChangeKind, id idpool.ID, key AllocatorKey) error
   169  
   170  // NewAllocatorForGC returns an allocator that can be used to run RunGC()
   171  //
   172  // The allocator can be configured by passing in additional options:
   173  //   - WithMin(id) - minimum ID to allocate (default: 1)
   174  //   - WithMax(id) - maximum ID to allocate (default max(uint64))
   175  func NewAllocatorForGC(backend Backend, opts ...AllocatorOption) *Allocator {
   176  	a := &Allocator{
   177  		backend: backend,
   178  		min:     idpool.ID(1),
   179  		max:     idpool.ID(^uint64(0)),
   180  	}
   181  
   182  	for _, fn := range opts {
   183  		fn(a)
   184  	}
   185  
   186  	return a
   187  }
   188  
   189  type GCStats struct {
   190  	// Alive is the number of identities alive
   191  	Alive int
   192  
   193  	// Deleted is the number of identities deleted
   194  	Deleted int
   195  }
   196  
   197  // Backend represents clients to remote ID allocation systems, such as KV
   198  // Stores. These are used to coordinate key->ID allocation between cilium
   199  // nodes.
   200  type Backend interface {
   201  	// DeleteAllKeys will delete all keys. It is used in tests.
   202  	DeleteAllKeys(ctx context.Context)
   203  
   204  	// Encode encodes a key string as required to conform to the key
   205  	// restrictions of the backend
   206  	Encode(string) string
   207  
   208  	// AllocateID creates a new key->ID association. This is expected to be a
   209  	// create-only operation, and the ID may be allocated by another node. An
   210  	// error in that case is not expected to be fatal. The actual ID is obtained
   211  	// by Allocator from the local idPool, which is updated with used-IDs as the
   212  	// Backend makes calls to the handler in ListAndWatch.
   213  	// The implementation of the backend might return an AllocatorKey that is
   214  	// a copy of 'key' with an internal reference of the backend key or, if it
   215  	// doesn't use the internal reference of the backend key it simply returns
   216  	// 'key'. In case of an error the returned 'AllocatorKey' should be nil.
   217  	AllocateID(ctx context.Context, id idpool.ID, key AllocatorKey) (AllocatorKey, error)
   218  
   219  	// AllocateIDIfLocked behaves like AllocateID but when lock is non-nil the
   220  	// operation proceeds only if it is still valid.
   221  	// The implementation of the backend might return an AllocatorKey that is
   222  	// a copy of 'key' with an internal reference of the backend key or, if it
   223  	// doesn't use the internal reference of the backend key it simply returns
   224  	// 'key'. In case of an error the returned 'AllocatorKey' should be nil.
   225  	AllocateIDIfLocked(ctx context.Context, id idpool.ID, key AllocatorKey, lock kvstore.KVLocker) (AllocatorKey, error)
   226  
   227  	// AcquireReference records that this node is using this key->ID mapping.
   228  	// This is distinct from any reference counting within this agent; only one
   229  	// reference exists for this node for any number of managed endpoints using
   230  	// it.
   231  	// The semantics of cleaning up stale references is delegated to the Backend
   232  	// implementation. RunGC may need to be invoked.
   233  	// This can race, and so lock can be provided (via a Lock call, below).
   234  	AcquireReference(ctx context.Context, id idpool.ID, key AllocatorKey, lock kvstore.KVLocker) error
   235  
   236  	// Release releases the use of an ID associated with the provided key. It
   237  	// does not guard against concurrent calls to
   238  	// releases.Release(ctx context.Context, key AllocatorKey) (err error)
   239  	Release(ctx context.Context, id idpool.ID, key AllocatorKey) (err error)
   240  
   241  	// UpdateKey refreshes the record that this node is using this key -> id
   242  	// mapping. When reliablyMissing is set it will also recreate missing master or
   243  	// slave keys.
   244  	UpdateKey(ctx context.Context, id idpool.ID, key AllocatorKey, reliablyMissing bool) error
   245  
   246  	// UpdateKeyIfLocked behaves like UpdateKey but when lock is non-nil the operation proceeds only if it is still valid.
   247  	UpdateKeyIfLocked(ctx context.Context, id idpool.ID, key AllocatorKey, reliablyMissing bool, lock kvstore.KVLocker) error
   248  
   249  	// Get returns the allocated ID for this key as seen by the Backend. This may
   250  	// have been created by other agents.
   251  	Get(ctx context.Context, key AllocatorKey) (idpool.ID, error)
   252  
   253  	// GetIfLocked behaves like Get, but but when lock is non-nil the
   254  	// operation proceeds only if it is still valid.
   255  	GetIfLocked(ctx context.Context, key AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error)
   256  
   257  	// GetByID returns the key associated with this ID, as seen by the Backend.
   258  	// This may have been created by other agents.
   259  	GetByID(ctx context.Context, id idpool.ID) (AllocatorKey, error)
   260  
   261  	// Lock provides an opaque lock object that can be used, later, to ensure
   262  	// that the key has not changed since the lock was created. This can be done
   263  	// with GetIfLocked.
   264  	Lock(ctx context.Context, key AllocatorKey) (kvstore.KVLocker, error)
   265  
   266  	// ListAndWatch begins synchronizing the local Backend instance with its
   267  	// remote.
   268  	ListAndWatch(ctx context.Context, handler CacheMutations, stopChan chan struct{})
   269  
   270  	// RunGC reaps stale or unused identities within the Backend and makes them
   271  	// available for reuse. It is used by the cilium-operator and is not invoked
   272  	// by cilium-agent.
   273  	// Note: not all Backend implemenations rely on this, such as the kvstore
   274  	// backends, and may use leases to expire keys.
   275  	RunGC(ctx context.Context, rateLimit *rate.Limiter, staleKeysPrevRound map[string]uint64, minID idpool.ID, maxID idpool.ID) (map[string]uint64, *GCStats, error)
   276  
   277  	// RunLocksGC reaps stale or unused locks within the Backend. It is used by
   278  	// the cilium-operator and is not invoked by cilium-agent. Returns
   279  	// a map of locks currently being held in the KVStore including the ones
   280  	// that failed to be GCed.
   281  	// Note: not all Backend implementations rely on this, such as the kvstore
   282  	// backends, and may use leases to expire keys.
   283  	RunLocksGC(ctx context.Context, staleKeysPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error)
   284  
   285  	// Status returns a human-readable status of the Backend.
   286  	Status() (string, error)
   287  }
   288  
   289  // NewAllocator creates a new Allocator. Any type can be used as key as long as
   290  // the type implements the AllocatorKey interface. A variable of the type has
   291  // to be passed into NewAllocator() to make the type known.  The specified base
   292  // path is used to prefix all keys in the kvstore. The provided path must be
   293  // unique.
   294  //
   295  // The allocator can be configured by passing in additional options:
   296  //   - WithEvents() - enable Events channel
   297  //   - WithMin(id) - minimum ID to allocate (default: 1)
   298  //   - WithMax(id) - maximum ID to allocate (default max(uint64))
   299  //
   300  // After creation, IDs can be allocated with Allocate() and released with
   301  // Release()
   302  func NewAllocator(typ AllocatorKey, backend Backend, opts ...AllocatorOption) (*Allocator, error) {
   303  	a := &Allocator{
   304  		keyType:      typ,
   305  		backend:      backend,
   306  		min:          idpool.ID(1),
   307  		max:          idpool.ID(^uint64(0)),
   308  		localKeys:    newLocalKeys(),
   309  		stopGC:       make(chan struct{}),
   310  		suffix:       uuid.New().String()[:10],
   311  		remoteCaches: map[string]*RemoteCache{},
   312  		backoffTemplate: backoff.Exponential{
   313  			Min:    time.Duration(20) * time.Millisecond,
   314  			Factor: 2.0,
   315  		},
   316  	}
   317  
   318  	for _, fn := range opts {
   319  		fn(a)
   320  	}
   321  
   322  	a.mainCache = newCache(a)
   323  
   324  	if a.suffix == "<nil>" {
   325  		return nil, errors.New("allocator suffix is <nil> and unlikely unique")
   326  	}
   327  
   328  	if a.min < 1 {
   329  		return nil, errors.New("minimum ID must be >= 1")
   330  	}
   331  
   332  	if a.max <= a.min {
   333  		return nil, fmt.Errorf("maximum ID must be greater than minimum ID: configured max %v, min %v", a.max, a.min)
   334  	}
   335  
   336  	a.idPool = idpool.NewIDPool(a.min, a.max)
   337  
   338  	if !a.disableAutostart {
   339  		a.start()
   340  	}
   341  
   342  	return a, nil
   343  }
   344  
   345  func (a *Allocator) start() {
   346  	a.initialListDone = a.mainCache.start()
   347  	if !a.disableGC {
   348  		go func() {
   349  			select {
   350  			case <-a.initialListDone:
   351  			case <-time.After(option.Config.AllocatorListTimeout):
   352  				log.Fatalf("Timeout while waiting for initial allocator state")
   353  			}
   354  			a.startLocalKeySync()
   355  		}()
   356  	}
   357  }
   358  
   359  // WithBackend sets this allocator to use backend. It is expected to be used at
   360  // initialization.
   361  func WithBackend(backend Backend) AllocatorOption {
   362  	return func(a *Allocator) {
   363  		a.backend = backend
   364  	}
   365  }
   366  
   367  // WithEvents enables receiving of events.
   368  //
   369  // CAUTION: When using this function. The provided channel must be continuously
   370  // read while NewAllocator() is being called to ensure that the channel does
   371  // not block indefinitely while NewAllocator() emits events on it while
   372  // populating the initial cache.
   373  func WithEvents(events AllocatorEventSendChan) AllocatorOption {
   374  	return func(a *Allocator) { a.events = events }
   375  }
   376  
   377  // WithMin sets the minimum identifier to be allocated
   378  func WithMin(id idpool.ID) AllocatorOption {
   379  	return func(a *Allocator) { a.min = id }
   380  }
   381  
   382  // WithMax sets the maximum identifier to be allocated
   383  func WithMax(id idpool.ID) AllocatorOption {
   384  	return func(a *Allocator) { a.max = id }
   385  }
   386  
   387  // WithPrefixMask sets the prefix used for all ID allocations. If set, the mask
   388  // will be ORed to all selected IDs prior to allocation. It is the
   389  // responsibility of the caller to ensure that the mask is not conflicting with
   390  // min..max.
   391  func WithPrefixMask(mask idpool.ID) AllocatorOption {
   392  	return func(a *Allocator) { a.prefixMask = mask }
   393  }
   394  
   395  // WithMasterKeyProtection will watch for delete events on master keys and
   396  // re-created them if local usage suggests that the key is still in use
   397  func WithMasterKeyProtection() AllocatorOption {
   398  	return func(a *Allocator) { a.enableMasterKeyProtection = true }
   399  }
   400  
   401  // WithoutGC disables the use of the garbage collector
   402  func WithoutGC() AllocatorOption {
   403  	return func(a *Allocator) { a.disableGC = true }
   404  }
   405  
   406  // WithoutAutostart prevents starting the allocator when it is initialized
   407  func WithoutAutostart() AllocatorOption {
   408  	return func(a *Allocator) { a.disableAutostart = true }
   409  }
   410  
   411  // WithCacheValidator registers a validator triggered for each identity
   412  // notification event to filter out invalid IDs and keys.
   413  func WithCacheValidator(validator CacheValidator) AllocatorOption {
   414  	return func(a *Allocator) { a.cacheValidators = append(a.cacheValidators, validator) }
   415  }
   416  
   417  // GetEvents returns the events channel given to the allocator when
   418  // constructed.
   419  // Note: This channel is not owned by the allocator!
   420  func (a *Allocator) GetEvents() AllocatorEventSendChan {
   421  	return a.events
   422  }
   423  
   424  // Delete deletes an allocator and stops the garbage collector
   425  func (a *Allocator) Delete() {
   426  	close(a.stopGC)
   427  	a.mainCache.stop()
   428  }
   429  
   430  // WaitForInitialSync waits until the initial sync is complete
   431  func (a *Allocator) WaitForInitialSync(ctx context.Context) error {
   432  	select {
   433  	case <-a.initialListDone:
   434  	case <-ctx.Done():
   435  		return fmt.Errorf("identity sync was cancelled: %w", ctx.Err())
   436  	}
   437  
   438  	return nil
   439  }
   440  
   441  // RangeFunc is the function called by RangeCache
   442  type RangeFunc func(idpool.ID, AllocatorKey)
   443  
   444  // ForeachCache iterates over the allocator cache and calls RangeFunc on each
   445  // cached entry
   446  func (a *Allocator) ForeachCache(cb RangeFunc) {
   447  	a.mainCache.foreach(cb)
   448  
   449  	a.remoteCachesMutex.RLock()
   450  	for _, rc := range a.remoteCaches {
   451  		rc.cache.foreach(cb)
   452  	}
   453  	a.remoteCachesMutex.RUnlock()
   454  }
   455  
   456  // selectAvailableID selects an available ID.
   457  // Returns a triple of the selected ID ORed with prefixMask, the ID string and
   458  // the originally selected ID.
   459  func (a *Allocator) selectAvailableID() (idpool.ID, string, idpool.ID) {
   460  	if id := a.idPool.LeaseAvailableID(); id != idpool.NoID {
   461  		unmaskedID := id
   462  		id |= a.prefixMask
   463  		return id, id.String(), unmaskedID
   464  	}
   465  
   466  	return 0, "", 0
   467  }
   468  
   469  // AllocatorKey is the interface to implement in order for a type to be used as
   470  // key for the allocator. The key's data is assumed to be a collection of
   471  // pkg/label.Label, and the functions reflect this somewhat.
   472  type AllocatorKey interface {
   473  	fmt.Stringer
   474  
   475  	// GetKey returns the canonical string representation of the key
   476  	GetKey() string
   477  
   478  	// PutKey stores the information in v into the key. This is the inverse
   479  	// operation to GetKey
   480  	PutKey(v string) AllocatorKey
   481  
   482  	// GetAsMap returns the key as a collection of "labels" with a key and value.
   483  	// This is the inverse operation to PutKeyFromMap.
   484  	GetAsMap() map[string]string
   485  
   486  	// PutKeyFromMap stores the labels in v into the key to be used later. This
   487  	// is the inverse operation to GetAsMap.
   488  	PutKeyFromMap(v map[string]string) AllocatorKey
   489  
   490  	// PutValue puts metadata inside the global identity for the given 'key' with
   491  	// the given 'value'.
   492  	PutValue(key any, value any) AllocatorKey
   493  
   494  	// Value returns the value stored in the metadata map.
   495  	Value(key any) any
   496  }
   497  
   498  func (a *Allocator) encodeKey(key AllocatorKey) string {
   499  	return a.backend.Encode(key.GetKey())
   500  }
   501  
   502  // Return values:
   503  //  1. allocated ID
   504  //  2. whether the ID is newly allocated from kvstore
   505  //  3. whether this is the first owner that holds a reference to the key in
   506  //     localkeys store
   507  //  4. error in case of failure
   508  func (a *Allocator) lockedAllocate(ctx context.Context, key AllocatorKey) (idpool.ID, bool, bool, error) {
   509  	var firstUse bool
   510  
   511  	kvstore.Trace("Allocating key in kvstore", nil, logrus.Fields{fieldKey: key})
   512  
   513  	k := a.encodeKey(key)
   514  	lock, err := a.backend.Lock(ctx, key)
   515  	if err != nil {
   516  		return 0, false, false, err
   517  	}
   518  
   519  	defer lock.Unlock(context.Background())
   520  
   521  	// fetch first key that matches /value/<key> while ignoring the
   522  	// node suffix
   523  	value, err := a.GetIfLocked(ctx, key, lock)
   524  	if err != nil {
   525  		return 0, false, false, err
   526  	}
   527  
   528  	kvstore.Trace("kvstore state is: ", nil, logrus.Fields{fieldID: value})
   529  
   530  	a.slaveKeysMutex.Lock()
   531  	defer a.slaveKeysMutex.Unlock()
   532  
   533  	// We shouldn't assume the fact the master key does not exist in the kvstore
   534  	// that localKeys does not have it. The KVStore might have lost all of its
   535  	// data but the local agent still holds a reference for the given master key.
   536  	if value == 0 {
   537  		value = a.localKeys.lookupKey(k)
   538  		if value != 0 {
   539  			// re-create master key
   540  			if err := a.backend.UpdateKeyIfLocked(ctx, value, key, true, lock); err != nil {
   541  				return 0, false, false, fmt.Errorf("unable to re-create missing master key '%s': %s while allocating ID: %w", key, value, err)
   542  			}
   543  		}
   544  	} else {
   545  		_, firstUse, err = a.localKeys.allocate(k, key, value)
   546  		if err != nil {
   547  			return 0, false, false, fmt.Errorf("unable to reserve local key '%s': %w", k, err)
   548  		}
   549  
   550  		if firstUse {
   551  			log.WithField(fieldKey, k).Debug("Reserved new local key")
   552  		} else {
   553  			log.WithField(fieldKey, k).Debug("Reusing existing local key")
   554  		}
   555  	}
   556  
   557  	if value != 0 {
   558  		log.WithField(fieldKey, k).Info("Reusing existing global key")
   559  
   560  		if err = a.backend.AcquireReference(ctx, value, key, lock); err != nil {
   561  			a.localKeys.release(k)
   562  			return 0, false, false, fmt.Errorf("unable to create secondary key '%s': %w", k, err)
   563  		}
   564  
   565  		// mark the key as verified in the local cache
   566  		if err := a.localKeys.verify(k); err != nil {
   567  			log.WithError(err).Error("BUG: Unable to verify local key")
   568  		}
   569  
   570  		return value, false, firstUse, nil
   571  	}
   572  
   573  	log.WithField(fieldKey, k).Debug("Allocating new master ID")
   574  	id, strID, unmaskedID := a.selectAvailableID()
   575  	if id == 0 {
   576  		return 0, false, false, fmt.Errorf("no more available IDs in configured space")
   577  	}
   578  
   579  	kvstore.Trace("Selected available key ID", nil, logrus.Fields{fieldID: id})
   580  
   581  	releaseKeyAndID := func() {
   582  		a.localKeys.release(k)
   583  		a.idPool.Release(unmaskedID) // This returns this ID to be re-used for other keys
   584  	}
   585  
   586  	oldID, firstUse, err := a.localKeys.allocate(k, key, id)
   587  	if err != nil {
   588  		a.idPool.Release(unmaskedID)
   589  		return 0, false, false, fmt.Errorf("unable to reserve local key '%s': %w", k, err)
   590  	}
   591  
   592  	// Another local writer beat us to allocating an ID for the same key,
   593  	// start over
   594  	if id != oldID {
   595  		releaseKeyAndID()
   596  		return 0, false, false, fmt.Errorf("another writer has allocated key %s", k)
   597  	}
   598  
   599  	// Check that this key has not been allocated in the cluster during our
   600  	// operation here
   601  	value, err = a.GetNoCache(ctx, key)
   602  	if err != nil {
   603  		releaseKeyAndID()
   604  		return 0, false, false, err
   605  	}
   606  	if value != 0 {
   607  		releaseKeyAndID()
   608  		return 0, false, false, fmt.Errorf("Found master key after proceeding with new allocation for %s", k)
   609  	}
   610  
   611  	// Assigned to 'key' from 'key2' since in case of an error, we don't replace
   612  	// the original 'key' variable with 'nil'.
   613  	key2 := key
   614  	key, err = a.backend.AllocateIDIfLocked(ctx, id, key2, lock)
   615  	if err != nil {
   616  		// Creation failed. Another agent most likely beat us to allocting this
   617  		// ID, retry.
   618  		releaseKeyAndID()
   619  		return 0, false, false, fmt.Errorf("unable to allocate ID %s for key %s: %w", strID, key2, err)
   620  	}
   621  
   622  	// Notify pool that leased ID is now in-use.
   623  	a.idPool.Use(unmaskedID)
   624  
   625  	if err = a.backend.AcquireReference(ctx, id, key, lock); err != nil {
   626  		// We will leak the master key here as the key has already been
   627  		// exposed and may be in use by other nodes. The garbage
   628  		// collector will release it again.
   629  		releaseKeyAndID()
   630  		return 0, false, false, fmt.Errorf("secondary key creation failed '%s': %w", k, err)
   631  	}
   632  
   633  	// mark the key as verified in the local cache
   634  	if err := a.localKeys.verify(k); err != nil {
   635  		log.WithError(err).Error("BUG: Unable to verify local key")
   636  	}
   637  
   638  	log.WithField(fieldKey, k).Info("Allocated new global key")
   639  
   640  	return id, true, firstUse, nil
   641  }
   642  
   643  // Allocate will retrieve the ID for the provided key. If no ID has been
   644  // allocated for this key yet, a key will be allocated. If allocation fails,
   645  // most likely due to a parallel allocation of the same ID by another user,
   646  // allocation is re-attempted for maxAllocAttempts times.
   647  //
   648  // Return values:
   649  //  1. allocated ID
   650  //  2. whether the ID is newly allocated from kvstore
   651  //  3. whether this is the first owner that holds a reference to the key in
   652  //     localkeys store
   653  //  4. error in case of failure
   654  func (a *Allocator) Allocate(ctx context.Context, key AllocatorKey) (idpool.ID, bool, bool, error) {
   655  	var (
   656  		err      error
   657  		value    idpool.ID
   658  		isNew    bool
   659  		firstUse bool
   660  		k        = a.encodeKey(key)
   661  	)
   662  
   663  	log.WithField(fieldKey, key).Debug("Allocating key")
   664  
   665  	select {
   666  	case <-a.initialListDone:
   667  	case <-ctx.Done():
   668  		return 0, false, false, fmt.Errorf("allocation was cancelled while waiting for initial key list to be received: %w", ctx.Err())
   669  	}
   670  
   671  	kvstore.Trace("Allocating from kvstore", nil, logrus.Fields{fieldKey: key})
   672  
   673  	// make a copy of the template and customize it
   674  	boff := a.backoffTemplate
   675  	boff.Name = key.String()
   676  
   677  	for attempt := 0; attempt < maxAllocAttempts; attempt++ {
   678  		// Check our list of local keys already in use and increment the
   679  		// refcnt. The returned key must be released afterwards. No kvstore
   680  		// operation was performed for this allocation.
   681  		// We also do this on every loop as a different Allocate call might have
   682  		// allocated the key while we are attempting to allocate in this
   683  		// execution thread. It does not hurt to check if localKeys contains a
   684  		// reference for the key that we are attempting to allocate.
   685  		if val := a.localKeys.use(k); val != idpool.NoID {
   686  			kvstore.Trace("Reusing local id", nil, logrus.Fields{fieldID: val, fieldKey: key})
   687  			a.mainCache.insert(key, val)
   688  			return val, false, false, nil
   689  		}
   690  
   691  		// FIXME: Add non-locking variant
   692  		value, isNew, firstUse, err = a.lockedAllocate(ctx, key)
   693  		if err == nil {
   694  			a.mainCache.insert(key, value)
   695  			log.WithField(fieldKey, key).WithField(fieldID, value).Debug("Allocated key")
   696  			return value, isNew, firstUse, nil
   697  		}
   698  
   699  		scopedLog := log.WithFields(logrus.Fields{
   700  			fieldKey:          key,
   701  			logfields.Attempt: attempt,
   702  		})
   703  
   704  		select {
   705  		case <-ctx.Done():
   706  			scopedLog.WithError(ctx.Err()).Warning("Ongoing key allocation has been cancelled")
   707  			return 0, false, false, fmt.Errorf("key allocation cancelled: %w", ctx.Err())
   708  		default:
   709  			scopedLog.WithError(err).Warning("Key allocation attempt failed")
   710  		}
   711  
   712  		kvstore.Trace("Allocation attempt failed", err, logrus.Fields{fieldKey: key, logfields.Attempt: attempt})
   713  
   714  		if waitErr := boff.Wait(ctx); waitErr != nil {
   715  			return 0, false, false, waitErr
   716  		}
   717  	}
   718  
   719  	return 0, false, false, err
   720  }
   721  
   722  // GetIfLocked returns the ID which is allocated to a key. Returns an ID of NoID if no ID
   723  // has been allocated to this key yet if the client is still holding the given
   724  // lock.
   725  func (a *Allocator) GetIfLocked(ctx context.Context, key AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error) {
   726  	if id := a.mainCache.get(a.encodeKey(key)); id != idpool.NoID {
   727  		return id, nil
   728  	}
   729  
   730  	return a.backend.GetIfLocked(ctx, key, lock)
   731  }
   732  
   733  // Get returns the ID which is allocated to a key. Returns an ID of NoID if no ID
   734  // has been allocated to this key yet.
   735  func (a *Allocator) Get(ctx context.Context, key AllocatorKey) (idpool.ID, error) {
   736  	if id := a.mainCache.get(a.encodeKey(key)); id != idpool.NoID {
   737  		return id, nil
   738  	}
   739  
   740  	return a.GetNoCache(ctx, key)
   741  }
   742  
   743  // GetNoCache returns the ID which is allocated to a key in the kvstore,
   744  // bypassing the local copy of allocated keys.
   745  func (a *Allocator) GetNoCache(ctx context.Context, key AllocatorKey) (idpool.ID, error) {
   746  	return a.backend.Get(ctx, key)
   747  }
   748  
   749  // GetByID returns the key associated with an ID. Returns nil if no key is
   750  // associated with the ID.
   751  func (a *Allocator) GetByID(ctx context.Context, id idpool.ID) (AllocatorKey, error) {
   752  	if key := a.mainCache.getByID(id); key != nil {
   753  		return key, nil
   754  	}
   755  
   756  	return a.backend.GetByID(ctx, id)
   757  }
   758  
   759  // GetIncludeRemoteCaches returns the ID which is allocated to a key. Includes the
   760  // caches of watched remote kvstores in the query. Returns an ID of NoID if no
   761  // ID has been allocated in any remote kvstore to this key yet.
   762  func (a *Allocator) GetIncludeRemoteCaches(ctx context.Context, key AllocatorKey) (idpool.ID, error) {
   763  	encoded := a.encodeKey(key)
   764  
   765  	// check main cache first
   766  	if id := a.mainCache.get(encoded); id != idpool.NoID {
   767  		return id, nil
   768  	}
   769  
   770  	// check remote caches
   771  	a.remoteCachesMutex.RLock()
   772  	for _, rc := range a.remoteCaches {
   773  		if id := rc.cache.get(encoded); id != idpool.NoID {
   774  			a.remoteCachesMutex.RUnlock()
   775  			return id, nil
   776  		}
   777  	}
   778  	a.remoteCachesMutex.RUnlock()
   779  
   780  	// check main backend
   781  	if id, err := a.backend.Get(ctx, key); id != idpool.NoID || err != nil {
   782  		return id, err
   783  	}
   784  
   785  	// we skip checking remote backends explicitly here, to avoid
   786  	// accidentally overloading them in case of lookups for invalid identities
   787  
   788  	return idpool.NoID, nil
   789  }
   790  
   791  // GetByIDIncludeRemoteCaches returns the key associated with an ID. Includes
   792  // the caches of watched remote kvstores in the query.
   793  // Returns nil if no key is associated with the ID.
   794  func (a *Allocator) GetByIDIncludeRemoteCaches(ctx context.Context, id idpool.ID) (AllocatorKey, error) {
   795  	// check main cache first
   796  	if key := a.mainCache.getByID(id); key != nil {
   797  		return key, nil
   798  	}
   799  
   800  	// check remote caches
   801  	a.remoteCachesMutex.RLock()
   802  	for _, rc := range a.remoteCaches {
   803  		if key := rc.cache.getByID(id); key != nil {
   804  			a.remoteCachesMutex.RUnlock()
   805  			return key, nil
   806  		}
   807  	}
   808  	a.remoteCachesMutex.RUnlock()
   809  
   810  	// check main backend
   811  	if key, err := a.backend.GetByID(ctx, id); key != nil || err != nil {
   812  		return key, err
   813  	}
   814  
   815  	// we skip checking remote backends explicitly here, to avoid
   816  	// accidentally overloading them in case of lookups for invalid identities
   817  
   818  	return nil, nil
   819  }
   820  
   821  // Release releases the use of an ID associated with the provided key. After
   822  // the last user has released the ID, the key is removed in the KVstore and
   823  // the returned lastUse value is true.
   824  func (a *Allocator) Release(ctx context.Context, key AllocatorKey) (lastUse bool, err error) {
   825  	log.WithField(fieldKey, key).Info("Releasing key")
   826  
   827  	select {
   828  	case <-a.initialListDone:
   829  	case <-ctx.Done():
   830  		return false, fmt.Errorf("release was cancelled while waiting for initial key list to be received: %w", ctx.Err())
   831  	}
   832  
   833  	k := a.encodeKey(key)
   834  
   835  	a.slaveKeysMutex.Lock()
   836  	defer a.slaveKeysMutex.Unlock()
   837  
   838  	// release the key locally, if it was the last use, remove the node
   839  	// specific value key to remove the global reference mark
   840  	var id idpool.ID
   841  	lastUse, id, err = a.localKeys.release(k)
   842  	if err != nil {
   843  		return lastUse, err
   844  	}
   845  	if lastUse {
   846  		// Since in CRD mode we don't have a way to map which identity is being
   847  		// used by a node, we need to also pass the ID to the release function.
   848  		// This allows the CRD store to find the right identity by its ID and
   849  		// remove the node reference on that identity.
   850  		a.backend.Release(ctx, id, key)
   851  	}
   852  
   853  	return lastUse, err
   854  }
   855  
   856  // RunGC scans the kvstore for unused master keys and removes them
   857  func (a *Allocator) RunGC(rateLimit *rate.Limiter, staleKeysPrevRound map[string]uint64) (map[string]uint64, *GCStats, error) {
   858  	return a.backend.RunGC(context.TODO(), rateLimit, staleKeysPrevRound, a.min, a.max)
   859  }
   860  
   861  // RunLocksGC scans the kvstore for stale locks and removes them
   862  func (a *Allocator) RunLocksGC(ctx context.Context, staleLocksPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error) {
   863  	return a.backend.RunLocksGC(ctx, staleLocksPrevRound)
   864  }
   865  
   866  // DeleteAllKeys will delete all keys. It is expected to be used in tests.
   867  func (a *Allocator) DeleteAllKeys() {
   868  	a.backend.DeleteAllKeys(context.TODO())
   869  }
   870  
   871  // syncLocalKeys checks the kvstore and verifies that a master key exists for
   872  // all locally used allocations. This will restore master keys if deleted for
   873  // some reason.
   874  func (a *Allocator) syncLocalKeys() error {
   875  	// Create a local copy of all local allocations to not require to hold
   876  	// any locks while performing kvstore operations. Local use can
   877  	// disappear while we perform the sync but that is fine as worst case,
   878  	// a master key is created for a slave key that no longer exists. The
   879  	// garbage collector will remove it again.
   880  	ids := a.localKeys.getVerifiedIDs()
   881  
   882  	for id, value := range ids {
   883  		if err := a.backend.UpdateKey(context.TODO(), id, value, false); err != nil {
   884  			log.WithError(err).WithFields(logrus.Fields{
   885  				fieldKey: value,
   886  				fieldID:  id,
   887  			}).Warning("Unable to sync key")
   888  		}
   889  	}
   890  
   891  	return nil
   892  }
   893  
   894  func (a *Allocator) startLocalKeySync() {
   895  	go func(a *Allocator) {
   896  		kvTimer, kvTimerDone := inctimer.New()
   897  		defer kvTimerDone()
   898  		for {
   899  			if err := a.syncLocalKeys(); err != nil {
   900  				log.WithError(err).Warning("Unable to run local key sync routine")
   901  			}
   902  
   903  			select {
   904  			case <-a.stopGC:
   905  				log.Debug("Stopped master key sync routine")
   906  				return
   907  			case <-kvTimer.After(option.Config.KVstorePeriodicSync):
   908  			}
   909  		}
   910  	}(a)
   911  }
   912  
   913  // AllocatorEventChan is a channel to receive allocator events on
   914  type AllocatorEventChan chan AllocatorEvent
   915  
   916  // Send- and receive-only versions of the above.
   917  type AllocatorEventRecvChan = <-chan AllocatorEvent
   918  type AllocatorEventSendChan = chan<- AllocatorEvent
   919  
   920  // AllocatorEvent is an event sent over AllocatorEventChan
   921  type AllocatorEvent struct {
   922  	// Typ is the type of event (upsert / delete)
   923  	Typ AllocatorChangeKind
   924  
   925  	// ID is the allocated ID
   926  	ID idpool.ID
   927  
   928  	// Key is the key associated with the ID
   929  	Key AllocatorKey
   930  }
   931  
   932  // RemoteCache represents the cache content of an additional kvstore managing
   933  // identities. The contents are not directly accessible but will be merged into
   934  // the ForeachCache() function.
   935  type RemoteCache struct {
   936  	name string
   937  
   938  	allocator *Allocator
   939  	cache     *cache
   940  
   941  	watchFunc func(ctx context.Context, remote *RemoteCache, onSync func(context.Context))
   942  }
   943  
   944  func (a *Allocator) NewRemoteCache(remoteName string, remoteAlloc *Allocator) *RemoteCache {
   945  	return &RemoteCache{
   946  		name:      remoteName,
   947  		allocator: remoteAlloc,
   948  		cache:     &remoteAlloc.mainCache,
   949  
   950  		watchFunc: a.WatchRemoteKVStore,
   951  	}
   952  }
   953  
   954  // WatchRemoteKVStore starts watching an allocator base prefix the kvstore
   955  // represents by the provided backend. A local cache of all identities of that
   956  // kvstore will be maintained in the RemoteCache structure returned and will
   957  // start being reported in the identities returned by the ForeachCache()
   958  // function. RemoteName should be unique per logical "remote".
   959  func (a *Allocator) WatchRemoteKVStore(ctx context.Context, rc *RemoteCache, onSync func(context.Context)) {
   960  	scopedLog := log.WithField(logfields.ClusterName, rc.name)
   961  	scopedLog.Info("Starting remote kvstore watcher")
   962  
   963  	rc.allocator.start()
   964  
   965  	select {
   966  	case <-ctx.Done():
   967  		scopedLog.Debug("Context canceled before remote kvstore watcher synchronization completed: stale identities will now be drained")
   968  		rc.close()
   969  
   970  		a.remoteCachesMutex.RLock()
   971  		old := a.remoteCaches[rc.name]
   972  		a.remoteCachesMutex.RUnlock()
   973  
   974  		if old != nil {
   975  			old.cache.mutex.RLock()
   976  			defer old.cache.mutex.RUnlock()
   977  		}
   978  
   979  		// Drain all entries that might have been received until now, and that
   980  		// are not present in the current cache (if any). This ensures we do not
   981  		// leak any stale identity, and at the same time we do not invalidate the
   982  		// current state.
   983  		rc.cache.drainIf(func(id idpool.ID) bool {
   984  			if old == nil {
   985  				return true
   986  			}
   987  
   988  			_, ok := old.cache.nextCache[id]
   989  			return !ok
   990  		})
   991  		return
   992  
   993  	case <-rc.cache.listDone:
   994  		scopedLog.Info("Remote kvstore watcher successfully synchronized and registered")
   995  	}
   996  
   997  	a.remoteCachesMutex.Lock()
   998  	old := a.remoteCaches[rc.name]
   999  	a.remoteCaches[rc.name] = rc
  1000  	a.remoteCachesMutex.Unlock()
  1001  
  1002  	if old != nil {
  1003  		// In case of reconnection, let's emit a deletion event for all stale identities
  1004  		// that are no longer present in the kvstore. We take the lock of the new cache
  1005  		// to ensure that we observe a stable state during this process (i.e., no keys
  1006  		// are added/removed in the meanwhile).
  1007  		scopedLog.Debug("Another kvstore watcher was already registered: deleting stale identities")
  1008  		rc.cache.mutex.RLock()
  1009  		old.cache.drainIf(func(id idpool.ID) bool {
  1010  			_, ok := rc.cache.nextCache[id]
  1011  			return !ok
  1012  		})
  1013  		rc.cache.mutex.RUnlock()
  1014  	}
  1015  
  1016  	// Execute the on-sync callback handler.
  1017  	onSync(ctx)
  1018  
  1019  	<-ctx.Done()
  1020  	rc.close()
  1021  	scopedLog.Info("Stopped remote kvstore watcher")
  1022  }
  1023  
  1024  // RemoveRemoteKVStore removes any reference to a remote allocator / kvstore, emitting
  1025  // a deletion event for all previously known identities.
  1026  func (a *Allocator) RemoveRemoteKVStore(remoteName string) {
  1027  	a.remoteCachesMutex.Lock()
  1028  	old := a.remoteCaches[remoteName]
  1029  	delete(a.remoteCaches, remoteName)
  1030  	a.remoteCachesMutex.Unlock()
  1031  
  1032  	if old != nil {
  1033  		old.cache.drain()
  1034  		log.WithField(logfields.ClusterName, remoteName).Info("Remote kvstore watcher unregistered")
  1035  	}
  1036  }
  1037  
  1038  // Watch starts watching the remote kvstore and synchronize the identities in
  1039  // the local cache. It blocks until the context is closed.
  1040  func (rc *RemoteCache) Watch(ctx context.Context, onSync func(context.Context)) {
  1041  	rc.watchFunc(ctx, rc, onSync)
  1042  }
  1043  
  1044  // NumEntries returns the number of entries in the remote cache
  1045  func (rc *RemoteCache) NumEntries() int {
  1046  	if rc == nil {
  1047  		return 0
  1048  	}
  1049  
  1050  	return rc.cache.numEntries()
  1051  }
  1052  
  1053  // Synced returns whether the initial list of entries has been retrieved from
  1054  // the kvstore, and new events are currently being watched.
  1055  func (rc *RemoteCache) Synced() bool {
  1056  	if rc == nil {
  1057  		return false
  1058  	}
  1059  
  1060  	select {
  1061  	case <-rc.cache.stopChan:
  1062  		return false
  1063  	default:
  1064  		select {
  1065  		case <-rc.cache.listDone:
  1066  			return true
  1067  		default:
  1068  			return false
  1069  		}
  1070  	}
  1071  }
  1072  
  1073  // close stops watching for identities in the kvstore associated with the
  1074  // remote cache.
  1075  func (rc *RemoteCache) close() {
  1076  	rc.cache.allocator.Delete()
  1077  }
  1078  
  1079  // Observe the identity changes. Conforms to stream.Observable.
  1080  // Replays the current state of the cache when subscribing.
  1081  func (a *Allocator) Observe(ctx context.Context, next func(AllocatorChange), complete func(error)) {
  1082  	a.mainCache.Observe(ctx, next, complete)
  1083  }