github.com/cilium/cilium@v1.16.2/pkg/allocator/cache.go

github.com/cilium/cilium@v1.16.2/pkg/allocator/cache.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package allocator
     5  
     6  import (
     7  	"context"
     8  	"sync"
     9  
    10  	"github.com/cilium/stream"
    11  	"github.com/sirupsen/logrus"
    12  
    13  	"github.com/cilium/cilium/pkg/controller"
    14  	"github.com/cilium/cilium/pkg/idpool"
    15  	"github.com/cilium/cilium/pkg/lock"
    16  	"github.com/cilium/cilium/pkg/logging/logfields"
    17  	"github.com/cilium/cilium/pkg/time"
    18  )
    19  
    20  // backendOpTimeout is the time allowed for operations sent to backends in
    21  // response to events such as create/modify/delete.
    22  const backendOpTimeout = 10 * time.Second
    23  
    24  // idMap provides mapping from ID to an AllocatorKey
    25  type idMap map[idpool.ID]AllocatorKey
    26  
    27  // keyMap provides mapping from AllocatorKey to ID
    28  type keyMap map[string]idpool.ID
    29  
    30  type cache struct {
    31  	controllers *controller.Manager
    32  
    33  	allocator *Allocator
    34  
    35  	stopChan chan struct{}
    36  
    37  	// mutex protects all cache data structures
    38  	mutex lock.RWMutex
    39  
    40  	// cache is a local cache of all IDs allocated in the kvstore. It is
    41  	// being maintained by watching for kvstore events and can thus lag
    42  	// behind.
    43  	cache idMap
    44  
    45  	// keyCache shadows cache and allows access by key
    46  	keyCache keyMap
    47  
    48  	// nextCache is the cache is constantly being filled by startWatch(),
    49  	// when startWatch has successfully performed the initial fill using
    50  	// ListPrefix, the cache above will be pointed to nextCache. If the
    51  	// startWatch() fails to perform the initial list, then the cache is
    52  	// never pointed to nextCache. This guarantees that a valid cache is
    53  	// kept at all times.
    54  	nextCache idMap
    55  
    56  	// nextKeyCache follows the same logic as nextCache but for keyCache
    57  	nextKeyCache keyMap
    58  
    59  	listDone waitChan
    60  
    61  	// stopWatchWg is a wait group that gets conditions added when a
    62  	// watcher is started with the conditions marked as done when the
    63  	// watcher has exited
    64  	stopWatchWg sync.WaitGroup
    65  
    66  	changeSrc         stream.Observable[AllocatorChange]
    67  	emitChange        func(AllocatorChange)
    68  	completeChangeSrc func(error)
    69  }
    70  
    71  func newCache(a *Allocator) (c cache) {
    72  	c = cache{
    73  		allocator:   a,
    74  		cache:       idMap{},
    75  		keyCache:    keyMap{},
    76  		stopChan:    make(chan struct{}),
    77  		controllers: controller.NewManager(),
    78  	}
    79  	c.changeSrc, c.emitChange, c.completeChangeSrc = stream.Multicast[AllocatorChange]()
    80  	return
    81  }
    82  
    83  type waitChan chan struct{}
    84  
    85  // CacheMutations are the operations given to a Backend's ListAndWatch command.
    86  // They are called on changes to identities.
    87  type CacheMutations interface {
    88  	// OnListDone is called when the initial full-sync is complete.
    89  	OnListDone()
    90  
    91  	// OnUpsert is called when either a new key->ID mapping appears or an existing
    92  	// one is modified. The latter case may occur e.g., when leases are updated,
    93  	// and does not mean that the actual mapping had changed.
    94  	OnUpsert(id idpool.ID, key AllocatorKey)
    95  
    96  	// OnDelete is called when a key->ID mapping is removed. This may trigger
    97  	// master-key protection, if enabled, where the local allocator will recreate
    98  	// the key->ID association is recreated because the local node is still using
    99  	// it.
   100  	OnDelete(id idpool.ID, key AllocatorKey)
   101  }
   102  
   103  func (c *cache) sendEvent(typ AllocatorChangeKind, id idpool.ID, key AllocatorKey) {
   104  	if events := c.allocator.events; events != nil {
   105  		events <- AllocatorEvent{Typ: typ, ID: id, Key: key}
   106  	}
   107  }
   108  
   109  func (c *cache) OnListDone() {
   110  	c.mutex.Lock()
   111  	// nextCache is valid, point the live cache to it
   112  	c.cache = c.nextCache
   113  	c.keyCache = c.nextKeyCache
   114  	c.mutex.Unlock()
   115  
   116  	log.Debug("Initial list of identities received")
   117  
   118  	// report that the list operation has
   119  	// been completed and the allocator is
   120  	// ready to use
   121  	close(c.listDone)
   122  }
   123  
   124  func (c *cache) OnUpsert(id idpool.ID, key AllocatorKey) {
   125  	for _, validator := range c.allocator.cacheValidators {
   126  		if err := validator(AllocatorChangeUpsert, id, key); err != nil {
   127  			log.WithError(err).WithFields(logrus.Fields{
   128  				logfields.Identity: id,
   129  				logfields.Event:    AllocatorChangeUpsert,
   130  			}).Warning("Skipping event for invalid identity")
   131  			return
   132  		}
   133  	}
   134  
   135  	c.mutex.Lock()
   136  	defer c.mutex.Unlock()
   137  
   138  	if k, ok := c.nextCache[id]; ok {
   139  		delete(c.nextKeyCache, c.allocator.encodeKey(k))
   140  	}
   141  
   142  	c.nextCache[id] = key
   143  	if key != nil {
   144  		c.nextKeyCache[c.allocator.encodeKey(key)] = id
   145  	}
   146  
   147  	c.allocator.idPool.Remove(id)
   148  
   149  	c.emitChange(AllocatorChange{Kind: AllocatorChangeUpsert, ID: id, Key: key})
   150  
   151  	c.sendEvent(AllocatorChangeUpsert, id, key)
   152  }
   153  
   154  func (c *cache) OnDelete(id idpool.ID, key AllocatorKey) {
   155  	for _, validator := range c.allocator.cacheValidators {
   156  		if err := validator(AllocatorChangeDelete, id, key); err != nil {
   157  			log.WithError(err).WithFields(logrus.Fields{
   158  				logfields.Identity: id,
   159  				logfields.Event:    AllocatorChangeDelete,
   160  			}).Warning("Skipping event for invalid identity")
   161  			return
   162  		}
   163  	}
   164  
   165  	c.mutex.Lock()
   166  	defer c.mutex.Unlock()
   167  
   168  	c.onDeleteLocked(id, key, true)
   169  }
   170  
   171  const syncIdentityControllerGroup = "sync-identity"
   172  
   173  func syncControllerName(id idpool.ID) string {
   174  	return syncIdentityControllerGroup + "-" + id.String()
   175  }
   176  
   177  // no max interval by default, exposed as a variable for testing.
   178  var masterKeyRecreateMaxInterval = time.Duration(0)
   179  
   180  var syncIdentityGroup = controller.NewGroup(syncIdentityControllerGroup)
   181  
   182  // onDeleteLocked must be called while holding c.Mutex for writing
   183  func (c *cache) onDeleteLocked(id idpool.ID, key AllocatorKey, recreateMissingLocalKeys bool) {
   184  	a := c.allocator
   185  	if a.enableMasterKeyProtection && recreateMissingLocalKeys {
   186  		if value := a.localKeys.lookupID(id); value != nil {
   187  			c.controllers.UpdateController(syncControllerName(id), controller.ControllerParams{
   188  				Context:          context.Background(),
   189  				MaxRetryInterval: masterKeyRecreateMaxInterval,
   190  				Group:            syncIdentityGroup,
   191  				DoFunc: func(ctx context.Context) error {
   192  					c.mutex.Lock()
   193  					defer c.mutex.Unlock()
   194  					// For each attempt, check if this ciliumidentity is still a candidate for recreation.
   195  					// It's possible that since the last iteration that this agent has legitimately deleted
   196  					// the key, in which case we can stop trying to recreate it.
   197  					if value := c.allocator.localKeys.lookupID(id); value == nil {
   198  						return nil
   199  					}
   200  
   201  					ctx, cancel := context.WithTimeout(ctx, backendOpTimeout)
   202  					defer cancel()
   203  
   204  					// Each iteration will attempt to grab the key reference, if that succeeds
   205  					// then this completes (i.e. the key exists).
   206  					// Otherwise we will attempt to create the key, this process repeats until
   207  					// the key is created.
   208  					if err := a.backend.UpdateKey(ctx, id, value, true); err != nil {
   209  						log.WithField("id", id).WithError(err).Error("OnDelete MasterKeyProtection update for key")
   210  						return err
   211  					}
   212  					log.WithField("id", id).Info("OnDelete MasterKeyProtection update succeeded")
   213  					return nil
   214  				},
   215  			})
   216  
   217  			return
   218  		}
   219  	}
   220  
   221  	if k, ok := c.nextCache[id]; ok && k != nil {
   222  		delete(c.nextKeyCache, c.allocator.encodeKey(k))
   223  	}
   224  
   225  	delete(c.nextCache, id)
   226  	a.idPool.Insert(id)
   227  
   228  	c.emitChange(AllocatorChange{Kind: AllocatorChangeDelete, ID: id, Key: key})
   229  
   230  	c.sendEvent(AllocatorChangeDelete, id, key)
   231  }
   232  
   233  // start requests a LIST operation from the kvstore and starts watching the
   234  // prefix in a go subroutine.
   235  func (c *cache) start() waitChan {
   236  	c.listDone = make(waitChan)
   237  
   238  	c.mutex.Lock()
   239  
   240  	// start with a fresh nextCache
   241  	c.nextCache = idMap{}
   242  	c.nextKeyCache = keyMap{}
   243  	c.mutex.Unlock()
   244  
   245  	c.stopWatchWg.Add(1)
   246  
   247  	go func() {
   248  		c.allocator.backend.ListAndWatch(context.TODO(), c, c.stopChan)
   249  		c.stopWatchWg.Done()
   250  	}()
   251  
   252  	return c.listDone
   253  }
   254  
   255  func (c *cache) stop() {
   256  	close(c.stopChan)
   257  	c.stopWatchWg.Wait()
   258  	// Drain/stop any remaining sync identity controllers.
   259  	// Backend watch is now stopped, any running controllers attempting to
   260  	// sync identities will complete and stop (possibly in a unresolved state).
   261  	c.controllers.RemoveAllAndWait()
   262  	c.completeChangeSrc(nil)
   263  }
   264  
   265  // drain emits a deletion event for all known IDs. It must be called after the
   266  // cache has been stopped, to ensure that no new events can be received afterwards.
   267  func (c *cache) drain() {
   268  	// Make sure we wait until the watch loop has been properly stopped.
   269  	c.stopWatchWg.Wait()
   270  
   271  	c.mutex.Lock()
   272  	for id, key := range c.nextCache {
   273  		c.onDeleteLocked(id, key, false)
   274  	}
   275  	c.mutex.Unlock()
   276  }
   277  
   278  // drainIf emits a deletion event for all known IDs that are stale according to
   279  // the isStale function. It must be called after the cache has been stopped, to
   280  // ensure that no new events can be received afterwards.
   281  func (c *cache) drainIf(isStale func(id idpool.ID) bool) {
   282  	// Make sure we wait until the watch loop has been properly stopped, otherwise
   283  	// new IDs might be added afterwards we complete the draining process.
   284  	c.stopWatchWg.Wait()
   285  
   286  	c.mutex.Lock()
   287  	for id, key := range c.nextCache {
   288  		if isStale(id) {
   289  			c.onDeleteLocked(id, key, false)
   290  			log.WithFields(logrus.Fields{fieldID: id, fieldKey: key}).
   291  				Debug("Stale identity deleted")
   292  		}
   293  	}
   294  	c.mutex.Unlock()
   295  }
   296  
   297  func (c *cache) get(key string) idpool.ID {
   298  	c.mutex.RLock()
   299  	if id, ok := c.keyCache[key]; ok {
   300  		c.mutex.RUnlock()
   301  		return id
   302  	}
   303  	c.mutex.RUnlock()
   304  
   305  	return idpool.NoID
   306  }
   307  
   308  func (c *cache) getByID(id idpool.ID) AllocatorKey {
   309  	c.mutex.RLock()
   310  	if v, ok := c.cache[id]; ok {
   311  		c.mutex.RUnlock()
   312  		return v
   313  	}
   314  	c.mutex.RUnlock()
   315  
   316  	return nil
   317  }
   318  
   319  func (c *cache) foreach(cb RangeFunc) {
   320  	c.mutex.RLock()
   321  	for k, v := range c.cache {
   322  		cb(k, v)
   323  	}
   324  	c.mutex.RUnlock()
   325  }
   326  
   327  func (c *cache) insert(key AllocatorKey, val idpool.ID) {
   328  	c.mutex.Lock()
   329  	c.nextCache[val] = key
   330  	c.nextKeyCache[c.allocator.encodeKey(key)] = val
   331  	c.mutex.Unlock()
   332  }
   333  
   334  func (c *cache) numEntries() int {
   335  	c.mutex.RLock()
   336  	defer c.mutex.RUnlock()
   337  	return len(c.nextCache)
   338  }
   339  
   340  type AllocatorChangeKind string
   341  
   342  const (
   343  	AllocatorChangeSync   AllocatorChangeKind = "sync"
   344  	AllocatorChangeUpsert AllocatorChangeKind = "upsert"
   345  	AllocatorChangeDelete AllocatorChangeKind = "delete"
   346  )
   347  
   348  type AllocatorChange struct {
   349  	Kind AllocatorChangeKind
   350  	ID   idpool.ID
   351  	Key  AllocatorKey
   352  }
   353  
   354  // Observe the allocator changes. Conforms to stream.Observable.
   355  // Replays the current state of the cache when subscribing.
   356  func (c *cache) Observe(ctx context.Context, next func(AllocatorChange), complete func(error)) {
   357  	// This short-lived go routine serves the purpose of replaying the current state of the cache before starting
   358  	// to observe the actual source changeSrc. ChangeSrc is backed by a stream.FuncObservable, that will start its own
   359  	// go routine. Therefore, the current go routine will stop and free the lock on the mutex after the registration.
   360  	go func() {
   361  		// Wait until initial listing has completed before
   362  		// replaying the state.
   363  		select {
   364  		case <-c.listDone:
   365  		case <-ctx.Done():
   366  			complete(ctx.Err())
   367  			return
   368  		}
   369  
   370  		c.mutex.RLock()
   371  		defer c.mutex.RUnlock()
   372  
   373  		for id, key := range c.cache {
   374  			next(AllocatorChange{Kind: AllocatorChangeUpsert, ID: id, Key: key})
   375  		}
   376  
   377  		// Emit a sync event to inform the subscriber that it has received a consistent
   378  		// initial state.
   379  		next(AllocatorChange{Kind: AllocatorChangeSync})
   380  
   381  		// And subscribe to new events. Since we held the read-lock there won't be any
   382  		// missed or duplicate events.
   383  		c.changeSrc.Observe(ctx, next, complete)
   384  	}()
   385  
   386  }