github.com/cilium/cilium@v1.16.2/pkg/identity/cache/allocator.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package cache
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  
    14  	"github.com/cilium/stream"
    15  	"github.com/google/renameio/v2"
    16  	jsoniter "github.com/json-iterator/go"
    17  	"github.com/sirupsen/logrus"
    18  
    19  	"github.com/cilium/cilium/pkg/allocator"
    20  	"github.com/cilium/cilium/pkg/controller"
    21  	"github.com/cilium/cilium/pkg/identity"
    22  	"github.com/cilium/cilium/pkg/identity/key"
    23  	"github.com/cilium/cilium/pkg/idpool"
    24  	api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io"
    25  	clientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
    26  	"github.com/cilium/cilium/pkg/k8s/identitybackend"
    27  	"github.com/cilium/cilium/pkg/kvstore"
    28  	kvstoreallocator "github.com/cilium/cilium/pkg/kvstore/allocator"
    29  	"github.com/cilium/cilium/pkg/labels"
    30  	"github.com/cilium/cilium/pkg/lock"
    31  	"github.com/cilium/cilium/pkg/logging/logfields"
    32  	"github.com/cilium/cilium/pkg/metrics"
    33  	"github.com/cilium/cilium/pkg/option"
    34  	"github.com/cilium/cilium/pkg/time"
    35  	"github.com/cilium/cilium/pkg/trigger"
    36  )
    37  
    38  var (
    39  	// IdentitiesPath is the path to where identities are stored in the
    40  	// key-value store.
    41  	IdentitiesPath = path.Join(kvstore.BaseKeyPrefix, "state", "identities", "v1")
    42  )
    43  
    44  // The filename for the local allocator checkpoont. This is periodically
    45  // written, and restored on restart.
    46  // The full path is, by default, /run/cilium/state/local_allocator_state.json
    47  const CheckpointFile = "local_allocator_state.json"
    48  
    49  // CachingIdentityAllocator manages the allocation of identities for both
    50  // global and local identities.
    51  type CachingIdentityAllocator struct {
    52  	// IdentityAllocator is an allocator for security identities from the
    53  	// kvstore.
    54  	IdentityAllocator *allocator.Allocator
    55  
    56  	// globalIdentityAllocatorInitialized is closed whenever the global identity
    57  	// allocator is initialized.
    58  	globalIdentityAllocatorInitialized chan struct{}
    59  
    60  	localIdentities *localIdentityCache
    61  
    62  	localNodeIdentities *localIdentityCache
    63  
    64  	identitiesPath string
    65  
    66  	// This field exists is to hand out references that are either for sending
    67  	// and receiving. It should not be used directly without converting it first
    68  	// to a AllocatorEventSendChan or AllocatorEventRecvChan.
    69  	events  allocator.AllocatorEventChan
    70  	watcher identityWatcher
    71  
    72  	// setupMutex synchronizes InitIdentityAllocator() and Close()
    73  	setupMutex lock.Mutex
    74  
    75  	owner IdentityAllocatorOwner
    76  
    77  	checkpointTrigger *trigger.Trigger
    78  	triggerDone       <-chan struct{}
    79  
    80  	// restoredIdentities are the set of identities read in from a
    81  	// checkpoint on startup. These should be released, see `restoreLocalIdentities()`
    82  	// for more info.
    83  	restoredIdentities map[identity.NumericIdentity]*identity.Identity
    84  
    85  	// checkpointPath is the file where local allocator state should be checkpoointed.
    86  	// The default is /run/cilium/state/local_allocator_state.json, changed only for testing.
    87  	checkpointPath string
    88  }
    89  
    90  // IdentityAllocatorOwner is the interface the owner of an identity allocator
    91  // must implement
    92  type IdentityAllocatorOwner interface {
    93  	// UpdateIdentities will be called when identities have changed
    94  	//
    95  	// The caller is responsible for making sure the same identity
    96  	// is not present in both 'added' and 'deleted', so that they
    97  	// can be processed in either order.
    98  	UpdateIdentities(added, deleted identity.IdentityMap)
    99  
   100  	// GetSuffix must return the node specific suffix to use
   101  	GetNodeSuffix() string
   102  }
   103  
   104  // IdentityAllocator is any type which is responsible for allocating security
   105  // identities based of sets of labels, and caching information about identities
   106  // locally.
   107  type IdentityAllocator interface {
   108  	// Identity changes are observable.
   109  	stream.Observable[IdentityChange]
   110  
   111  	// WaitForInitialGlobalIdentities waits for the initial set of global
   112  	// security identities to have been received.
   113  	WaitForInitialGlobalIdentities(context.Context) error
   114  
   115  	// AllocateIdentity allocates an identity described by the specified labels.
   116  	// A possible previously used numeric identity for these labels can be passed
   117  	// in as the last parameter; identity.InvalidIdentity must be passed if no
   118  	// previous numeric identity exists.
   119  	AllocateIdentity(context.Context, labels.Labels, bool, identity.NumericIdentity) (*identity.Identity, bool, error)
   120  
   121  	// Release is the reverse operation of AllocateIdentity() and releases the
   122  	// specified identity.
   123  	Release(context.Context, *identity.Identity, bool) (released bool, err error)
   124  
   125  	// LookupIdentityByID returns the identity that corresponds to the given
   126  	// labels.
   127  	LookupIdentity(ctx context.Context, lbls labels.Labels) *identity.Identity
   128  
   129  	// LookupIdentityByID returns the identity that corresponds to the given
   130  	// numeric identity.
   131  	LookupIdentityByID(ctx context.Context, id identity.NumericIdentity) *identity.Identity
   132  
   133  	// GetIdentityCache returns the current cache of identities that the
   134  	// allocator has allocated. The caller should not modify the resulting
   135  	// identities by pointer.
   136  	GetIdentityCache() identity.IdentityMap
   137  
   138  	// GetIdentities returns a copy of the current cache of identities.
   139  	GetIdentities() IdentitiesModel
   140  
   141  	// WithholdLocalIdentities holds a set of numeric identities out of the local
   142  	// allocation pool(s). Once withheld, a numeric identity can only be used
   143  	// when explicitly requested via AllocateIdentity(..., oldNID).
   144  	WithholdLocalIdentities(nids []identity.NumericIdentity)
   145  
   146  	// UnwithholdLocalIdentities removes numeric identities from the withheld set,
   147  	// freeing them for general allocation.
   148  	UnwithholdLocalIdentities(nids []identity.NumericIdentity)
   149  }
   150  
   151  // InitIdentityAllocator creates the global identity allocator. Only the first
   152  // invocation of this function will have an effect. The Caller must have
   153  // initialized well known identities before calling this (by calling
   154  // identity.InitWellKnownIdentities()).
   155  // The client is only used by the CRD identity allocator currently.
   156  // Returns a channel which is closed when initialization of the allocator is
   157  // completed.
   158  // TODO: identity backends are initialized directly in this function, pulling
   159  // in dependencies on kvstore and k8s. It would be better to decouple this,
   160  // since the backends are an interface.
   161  func (m *CachingIdentityAllocator) InitIdentityAllocator(client clientset.Interface) <-chan struct{} {
   162  	m.setupMutex.Lock()
   163  	defer m.setupMutex.Unlock()
   164  
   165  	if m.IdentityAllocator != nil {
   166  		log.Panic("InitIdentityAllocator() in succession without calling Close()")
   167  	}
   168  
   169  	log.Info("Initializing identity allocator")
   170  
   171  	minID := idpool.ID(identity.GetMinimalAllocationIdentity(option.Config.ClusterID))
   172  	maxID := idpool.ID(identity.GetMaximumAllocationIdentity(option.Config.ClusterID))
   173  
   174  	log.WithFields(map[string]interface{}{
   175  		"min":        minID,
   176  		"max":        maxID,
   177  		"cluster-id": option.Config.ClusterID,
   178  	}).Info("Allocating identities between range")
   179  
   180  	// In the case of the allocator being closed, we need to create a new events channel
   181  	// and start a new watch.
   182  	if m.events == nil {
   183  		m.events = make(allocator.AllocatorEventChan, eventsQueueSize)
   184  		m.watcher.watch(m.events)
   185  	}
   186  
   187  	// Asynchronously set up the global identity allocator since it connects
   188  	// to the kvstore.
   189  	go func(owner IdentityAllocatorOwner, events allocator.AllocatorEventSendChan, minID, maxID idpool.ID) {
   190  		m.setupMutex.Lock()
   191  		defer m.setupMutex.Unlock()
   192  
   193  		var (
   194  			backend allocator.Backend
   195  			err     error
   196  		)
   197  
   198  		switch option.Config.IdentityAllocationMode {
   199  		case option.IdentityAllocationModeKVstore:
   200  			log.Debug("Identity allocation backed by KVStore")
   201  			backend, err = kvstoreallocator.NewKVStoreBackend(m.identitiesPath, owner.GetNodeSuffix(), &key.GlobalIdentity{}, kvstore.Client())
   202  			if err != nil {
   203  				log.WithError(err).Fatal("Unable to initialize kvstore backend for identity allocation")
   204  			}
   205  
   206  		case option.IdentityAllocationModeCRD:
   207  			log.Debug("Identity allocation backed by CRD")
   208  			backend, err = identitybackend.NewCRDBackend(identitybackend.CRDBackendConfiguration{
   209  				Store:   nil,
   210  				Client:  client,
   211  				KeyFunc: (&key.GlobalIdentity{}).PutKeyFromMap,
   212  			})
   213  			if err != nil {
   214  				log.WithError(err).Fatal("Unable to initialize Kubernetes CRD backend for identity allocation")
   215  			}
   216  
   217  		default:
   218  			log.Fatalf("Unsupported identity allocation mode %s", option.Config.IdentityAllocationMode)
   219  		}
   220  
   221  		a, err := allocator.NewAllocator(&key.GlobalIdentity{}, backend,
   222  			allocator.WithMax(maxID), allocator.WithMin(minID),
   223  			allocator.WithEvents(events),
   224  			allocator.WithMasterKeyProtection(),
   225  			allocator.WithPrefixMask(idpool.ID(option.Config.ClusterID<<identity.GetClusterIDShift())))
   226  		if err != nil {
   227  			log.WithError(err).Fatalf("Unable to initialize Identity Allocator with backend %s", option.Config.IdentityAllocationMode)
   228  		}
   229  
   230  		m.IdentityAllocator = a
   231  		close(m.globalIdentityAllocatorInitialized)
   232  	}(m.owner, m.events, minID, maxID)
   233  
   234  	return m.globalIdentityAllocatorInitialized
   235  }
   236  
   237  // EnableCheckpointing enables checkpointing the local allocator state.
   238  // The CachingIdentityAllocator is used in multiple places, but we only want to
   239  // checkpoint the "primary" allocator
   240  func (m *CachingIdentityAllocator) EnableCheckpointing() {
   241  	controllerManager := controller.NewManager()
   242  	controllerGroup := controller.NewGroup("identity-allocator")
   243  	controllerName := "local-identity-checkpoint"
   244  	triggerDone := make(chan struct{})
   245  	t, _ := trigger.NewTrigger(trigger.Parameters{
   246  		MinInterval: 10 * time.Second,
   247  		TriggerFunc: func(reasons []string) {
   248  			controllerManager.UpdateController(controllerName, controller.ControllerParams{
   249  				Group:    controllerGroup,
   250  				DoFunc:   m.checkpoint,
   251  				StopFunc: m.checkpoint, // perform one last checkpoint when the controller is removed
   252  			})
   253  		},
   254  		ShutdownFunc: func() {
   255  			controllerManager.RemoveControllerAndWait(controllerName) // waits for StopFunc
   256  			close(triggerDone)
   257  		},
   258  	})
   259  
   260  	m.checkpointTrigger = t
   261  	m.triggerDone = triggerDone
   262  }
   263  
   264  const eventsQueueSize = 1024
   265  
   266  // InitIdentityAllocator creates the identity allocator. Only the first
   267  // invocation of this function will have an effect. The Caller must have
   268  // initialized well known identities before calling this (by calling
   269  // identity.InitWellKnownIdentities()).
   270  // client and identityStore are only used by the CRD identity allocator,
   271  // currently, and identityStore may be nil.
   272  // Returns a channel which is closed when initialization of the allocator is
   273  // completed.
   274  // TODO: identity backends are initialized directly in this function, pulling
   275  // in dependencies on kvstore and k8s. It would be better to decouple this,
   276  // since the backends are an interface.
   277  
   278  // NewCachingIdentityAllocator creates a new instance of an
   279  // CachingIdentityAllocator.
   280  func NewCachingIdentityAllocator(owner IdentityAllocatorOwner) *CachingIdentityAllocator {
   281  	watcher := identityWatcher{
   282  		owner: owner,
   283  	}
   284  
   285  	m := &CachingIdentityAllocator{
   286  		globalIdentityAllocatorInitialized: make(chan struct{}),
   287  		owner:                              owner,
   288  		identitiesPath:                     IdentitiesPath,
   289  		watcher:                            watcher,
   290  		events:                             make(allocator.AllocatorEventChan, eventsQueueSize),
   291  	}
   292  	if option.Config.RunDir != "" { // disable checkpointing if this is a unit test
   293  		m.checkpointPath = filepath.Join(option.Config.StateDir, CheckpointFile)
   294  	}
   295  	m.watcher.watch(m.events)
   296  
   297  	// Local identity cache can be created synchronously since it doesn't
   298  	// rely upon any external resources (e.g., external kvstore).
   299  	m.localIdentities = newLocalIdentityCache(identity.IdentityScopeLocal, identity.MinAllocatorLocalIdentity, identity.MaxAllocatorLocalIdentity, m.events)
   300  	m.localNodeIdentities = newLocalIdentityCache(identity.IdentityScopeRemoteNode, identity.MinAllocatorLocalIdentity, identity.MaxAllocatorLocalIdentity, m.events)
   301  
   302  	return m
   303  }
   304  
   305  // Close closes the identity allocator
   306  func (m *CachingIdentityAllocator) Close() {
   307  	m.setupMutex.Lock()
   308  	defer m.setupMutex.Unlock()
   309  
   310  	if m.checkpointTrigger != nil {
   311  		m.checkpointTrigger.Shutdown()
   312  		<-m.triggerDone
   313  		m.checkpointTrigger = nil
   314  	}
   315  
   316  	select {
   317  	case <-m.globalIdentityAllocatorInitialized:
   318  		// This means the channel was closed and therefore the IdentityAllocator == nil will never be true
   319  	default:
   320  		if m.IdentityAllocator == nil {
   321  			log.Error("Close() called without calling InitIdentityAllocator() first")
   322  			return
   323  		}
   324  	}
   325  
   326  	m.IdentityAllocator.Delete()
   327  	if m.events != nil {
   328  		m.localIdentities.close()
   329  		m.localNodeIdentities.close()
   330  		close(m.events)
   331  		m.events = nil
   332  	}
   333  
   334  	m.IdentityAllocator = nil
   335  	m.globalIdentityAllocatorInitialized = make(chan struct{})
   336  }
   337  
   338  // WaitForInitialGlobalIdentities waits for the initial set of global security
   339  // identities to have been received and populated into the allocator cache.
   340  func (m *CachingIdentityAllocator) WaitForInitialGlobalIdentities(ctx context.Context) error {
   341  	select {
   342  	case <-m.globalIdentityAllocatorInitialized:
   343  	case <-ctx.Done():
   344  		return fmt.Errorf("initial global identity sync was cancelled: %w", ctx.Err())
   345  	}
   346  
   347  	return m.IdentityAllocator.WaitForInitialSync(ctx)
   348  }
   349  
   350  var ErrNonLocalIdentity = fmt.Errorf("labels would result in global identity")
   351  
   352  // AllocateLocalIdentity works the same as AllocateIdentity, but it guarantees that the allocated
   353  // identity will be local-only. If the provided set of labels does not map to a local identity scope,
   354  // this will return an error.
   355  func (m *CachingIdentityAllocator) AllocateLocalIdentity(lbls labels.Labels, notifyOwner bool, oldNID identity.NumericIdentity) (id *identity.Identity, allocated bool, err error) {
   356  
   357  	// If this is a reserved, pre-allocated identity, just return that and be done
   358  	if reservedIdentity := identity.LookupReservedIdentityByLabels(lbls); reservedIdentity != nil {
   359  		if option.Config.Debug {
   360  			log.WithFields(logrus.Fields{
   361  				logfields.Identity:       reservedIdentity.ID,
   362  				logfields.IdentityLabels: lbls.String(),
   363  				"isNew":                  false,
   364  			}).Debug("Resolving reserved identity")
   365  		}
   366  		return reservedIdentity, false, nil
   367  	}
   368  
   369  	if option.Config.Debug {
   370  		log.WithFields(logrus.Fields{
   371  			logfields.IdentityLabels: lbls.String(),
   372  		}).Debug("Resolving local identity")
   373  	}
   374  
   375  	// Allocate according to scope
   376  	var metricLabel string
   377  	switch scope := identity.ScopeForLabels(lbls); scope {
   378  	case identity.IdentityScopeLocal:
   379  		id, allocated, err = m.localIdentities.lookupOrCreate(lbls, oldNID, notifyOwner)
   380  		metricLabel = identity.NodeLocalIdentityType
   381  	case identity.IdentityScopeRemoteNode:
   382  		id, allocated, err = m.localNodeIdentities.lookupOrCreate(lbls, oldNID, notifyOwner)
   383  		metricLabel = identity.RemoteNodeIdentityType
   384  	default:
   385  		log.WithFields(logrus.Fields{
   386  			logfields.Labels: lbls,
   387  			"scope":          scope,
   388  		}).Error("BUG: attempt to allocate local identity for labels, but a global identity is required")
   389  		return nil, false, ErrNonLocalIdentity
   390  	}
   391  	if err != nil {
   392  		return nil, false, err
   393  	}
   394  
   395  	if allocated {
   396  		metrics.Identity.WithLabelValues(metricLabel).Inc()
   397  		for labelSource := range lbls.CollectSources() {
   398  			metrics.IdentityLabelSources.WithLabelValues(labelSource).Inc()
   399  		}
   400  
   401  		if m.checkpointTrigger != nil {
   402  			m.checkpointTrigger.Trigger()
   403  		}
   404  
   405  		if notifyOwner {
   406  			added := identity.IdentityMap{
   407  				id.ID: id.LabelArray,
   408  			}
   409  			m.owner.UpdateIdentities(added, nil)
   410  		}
   411  	}
   412  
   413  	return
   414  }
   415  
   416  // needsGlobalIdentity returns true if these labels require
   417  // allocating a global identity
   418  func needsGlobalIdentity(lbls labels.Labels) bool {
   419  	// If lbls corresponds to a reserved identity, no global allocation required
   420  	if identity.LookupReservedIdentityByLabels(lbls) != nil {
   421  		return false
   422  	}
   423  
   424  	// determine identity scope from labels,
   425  	return identity.ScopeForLabels(lbls) == identity.IdentityScopeGlobal
   426  }
   427  
   428  // AllocateIdentity allocates an identity described by the specified labels. If
   429  // an identity for the specified set of labels already exist, the identity is
   430  // re-used and reference counting is performed, otherwise a new identity is
   431  // allocated via the kvstore or via the local identity allocator.
   432  // A possible previously used numeric identity for these labels can be passed
   433  // in as the 'oldNID' parameter; identity.InvalidIdentity must be passed if no
   434  // previous numeric identity exists.
   435  func (m *CachingIdentityAllocator) AllocateIdentity(ctx context.Context, lbls labels.Labels, notifyOwner bool, oldNID identity.NumericIdentity) (id *identity.Identity, allocated bool, err error) {
   436  	if !needsGlobalIdentity(lbls) {
   437  		return m.AllocateLocalIdentity(lbls, notifyOwner, oldNID)
   438  	}
   439  
   440  	if option.Config.Debug {
   441  		log.WithFields(logrus.Fields{
   442  			logfields.IdentityLabels: lbls.String(),
   443  		}).Debug("Resolving global identity")
   444  	}
   445  
   446  	// This will block until the kvstore can be accessed and all identities
   447  	// were successfully synced
   448  	err = m.WaitForInitialGlobalIdentities(ctx)
   449  	if err != nil {
   450  		return nil, false, err
   451  	}
   452  
   453  	if m.IdentityAllocator == nil {
   454  		return nil, false, fmt.Errorf("allocator not initialized")
   455  	}
   456  
   457  	idp, allocated, isNewLocally, err := m.IdentityAllocator.Allocate(ctx, &key.GlobalIdentity{LabelArray: lbls.LabelArray()})
   458  	if err != nil {
   459  		return nil, false, err
   460  	}
   461  	if idp > identity.MaxNumericIdentity {
   462  		return nil, false, fmt.Errorf("%d: numeric identity too large", idp)
   463  	}
   464  	id = identity.NewIdentity(identity.NumericIdentity(idp), lbls)
   465  
   466  	if option.Config.Debug {
   467  		log.WithFields(logrus.Fields{
   468  			logfields.Identity:       idp,
   469  			logfields.IdentityLabels: lbls.String(),
   470  			"isNew":                  allocated,
   471  			"isNewLocally":           isNewLocally,
   472  		}).Debug("Resolved identity")
   473  	}
   474  
   475  	if allocated || isNewLocally {
   476  		metrics.Identity.WithLabelValues(identity.ClusterLocalIdentityType).Inc()
   477  		for labelSource := range lbls.CollectSources() {
   478  			metrics.IdentityLabelSources.WithLabelValues(labelSource).Inc()
   479  		}
   480  	}
   481  
   482  	// Notify the owner of the newly added identities so that the
   483  	// cached identities can be updated ASAP, rather than just
   484  	// relying on the kv-store update events.
   485  	if allocated && notifyOwner {
   486  		added := identity.IdentityMap{
   487  			id.ID: id.LabelArray,
   488  		}
   489  		m.owner.UpdateIdentities(added, nil)
   490  	}
   491  
   492  	return id, allocated, nil
   493  }
   494  
   495  func (m *CachingIdentityAllocator) WithholdLocalIdentities(nids []identity.NumericIdentity) {
   496  	log.WithField(logfields.Identity, nids).Debug("Withholding numeric identities for later restoration")
   497  
   498  	// The allocators will return any identities that are not in-scope.
   499  	nids = m.localIdentities.withhold(nids)
   500  	nids = m.localNodeIdentities.withhold(nids)
   501  	if len(nids) > 0 {
   502  		log.WithField(logfields.Identity, nids).Error("Attempt to restore invalid numeric identities.")
   503  	}
   504  }
   505  
   506  func (m *CachingIdentityAllocator) UnwithholdLocalIdentities(nids []identity.NumericIdentity) {
   507  	log.WithField(logfields.Identity, nids).Debug("Unwithholding numeric identities")
   508  
   509  	// The allocators will ignore any identities that are not in-scope.
   510  	m.localIdentities.unwithhold(nids)
   511  	m.localNodeIdentities.unwithhold(nids)
   512  }
   513  
   514  // checkpoint writes the state of the local allocators to disk. This is used for restoration,
   515  // to ensure that numeric identities are, as much as possible, stable across agent restarts.
   516  //
   517  // Do not call this directly, rather, use m.checkpointTrigger.Trigger()
   518  func (m *CachingIdentityAllocator) checkpoint(ctx context.Context) error {
   519  	if m.checkpointPath == "" {
   520  		return nil // this is a unit test
   521  	}
   522  	log := log.WithField(logfields.Path, m.checkpointPath)
   523  
   524  	ids := make([]*identity.Identity, 0, m.localIdentities.size()+m.localNodeIdentities.size())
   525  	ids = m.localIdentities.checkpoint(ids)
   526  	ids = m.localNodeIdentities.checkpoint(ids)
   527  
   528  	// use renameio to prevent partial writes
   529  	out, err := renameio.NewPendingFile(m.checkpointPath, renameio.WithExistingPermissions(), renameio.WithPermissions(0o600))
   530  	if err != nil {
   531  		log.WithError(err).Error("failed to prepare checkpoint file")
   532  		return err
   533  	}
   534  	defer out.Cleanup()
   535  
   536  	jw := jsoniter.ConfigFastest.NewEncoder(out)
   537  	if err := jw.Encode(ids); err != nil {
   538  		log.WithError(err).Error("failed to marshal identity checkpoint state")
   539  		return err
   540  	}
   541  	if err := out.CloseAtomicallyReplace(); err != nil {
   542  		log.WithError(err).Error("failed to write identity checkpoint file")
   543  		return err
   544  	}
   545  	log.Debug("Wrote local identity allocator checkpoint")
   546  	return nil
   547  }
   548  
   549  // RestoreLocalIdentities reads in the checkpointed local allocator state
   550  // from disk and allocates a reference to every previously existing identity.
   551  //
   552  // Once all identity-allocating objects are synchronized (e.g. network policies,
   553  // remote nodes), call ReleaseRestoredIdentities to release the held references.
   554  func (m *CachingIdentityAllocator) RestoreLocalIdentities() (map[identity.NumericIdentity]*identity.Identity, error) {
   555  	if m.checkpointPath == "" {
   556  		return nil, nil // unit test
   557  	}
   558  	log := log.WithField(logfields.Path, m.checkpointPath)
   559  
   560  	// Read in checkpoint file
   561  	fp, err := os.Open(m.checkpointPath)
   562  	if err != nil {
   563  		if os.IsNotExist(err) {
   564  			log.Info("No identity checkpoint file found, skipping restoration")
   565  			return nil, nil
   566  		}
   567  		return nil, fmt.Errorf("failed to open identity checkpoint file %s: %w", m.checkpointPath, err)
   568  	}
   569  	defer fp.Close()
   570  
   571  	jr := jsoniter.ConfigFastest.NewDecoder(fp)
   572  	var ids []*identity.Identity
   573  	if err := jr.Decode(&ids); err != nil {
   574  		return nil, fmt.Errorf("failed to parse identity checkpoint file %s: %w", m.checkpointPath, err)
   575  	}
   576  
   577  	if len(ids) == 0 {
   578  		return nil, nil
   579  	}
   580  
   581  	// Load in checkpoint:
   582  	// - withhold numeric identities
   583  	// - allocate previous identities
   584  	// - update SelectorCache
   585  	// - unwithhold numeric IDs
   586  
   587  	log.WithField(logfields.Count, len(ids)).Info("Restoring checkpointed local identities")
   588  	m.restoredIdentities = make(map[identity.NumericIdentity]*identity.Identity, len(ids))
   589  	added := make(identity.IdentityMap, len(ids))
   590  
   591  	// Withhold restored local identities from allocation (except by request).
   592  	// This is insurance against a code change causing identities to be allocated
   593  	// differently, which could disrupt restoration.
   594  	// Withholding numeric IDs prevents them from being allocated except by explicit request.
   595  	oldNumIDs := make([]identity.NumericIdentity, 0, len(ids))
   596  	for _, id := range ids {
   597  		oldNumIDs = append(oldNumIDs, id.ID)
   598  	}
   599  	m.WithholdLocalIdentities(oldNumIDs)
   600  
   601  	for _, oldID := range ids {
   602  		// Ensure we do not restore any global identities or identities that somehow are
   603  		// changing scope. There's no point, as the numeric identity will be different.
   604  		if scope := identity.ScopeForLabels(oldID.Labels); scope != oldID.ID.Scope() || needsGlobalIdentity(oldID.Labels) {
   605  			// Should not happen, except when the scope for labels changes
   606  			// such as disabling policy-cidr-match-mode=nodes
   607  			log.WithFields(logrus.Fields{
   608  				logfields.Identity: oldID,
   609  				"scope":            scope,
   610  			}).Warn("skipping restore of non-local or re-scoped identity")
   611  			continue
   612  		}
   613  
   614  		newID, _, err := m.AllocateLocalIdentity(
   615  			oldID.Labels,
   616  			false,    // do not add to selector cache; we'll batch that later
   617  			oldID.ID, // request previous numeric ID
   618  		)
   619  		if err != nil {
   620  			log.WithError(err).WithField(logfields.Identity, oldID).Error("failed to restore checkpointed local identity, continuing")
   621  		} else {
   622  			m.restoredIdentities[newID.ID] = newID
   623  			added[newID.ID] = newID.LabelArray
   624  			if newID.ID != oldID.ID {
   625  				// Paranoia, shouldn't happen
   626  				log.WithField(logfields.Identity, newID).Warn("Restored local identity has different numeric ID")
   627  			}
   628  		}
   629  	}
   630  
   631  	// Add identities to SelectorCache
   632  	if m.owner != nil {
   633  		m.owner.UpdateIdentities(added, nil)
   634  	}
   635  
   636  	// Release all withheld numeric identities back for general use.
   637  	m.UnwithholdLocalIdentities(oldNumIDs)
   638  
   639  	// return the set of restored identities, which is useful for prefix restoration
   640  	return m.restoredIdentities, nil
   641  }
   642  
   643  // ReleaseRestoredIdentities releases any identities that were restored, reducing their reference
   644  // count and cleaning up as necessary.
   645  func (m *CachingIdentityAllocator) ReleaseRestoredIdentities() {
   646  	deleted := make(identity.IdentityMap, len(m.restoredIdentities))
   647  	for _, id := range m.restoredIdentities {
   648  		released, err := m.Release(context.Background(), id, false)
   649  		if err != nil {
   650  			// This should never happen; these IDs are local
   651  			log.WithError(err).WithField(logfields.Identity, id).Error("failed to release restored identity")
   652  			continue
   653  		}
   654  		if option.Config.Debug {
   655  			log.WithFields(logrus.Fields{
   656  				logfields.Identity: id,
   657  				"released":         released,
   658  			}).Debug("Released restored identity reference")
   659  		}
   660  		if released {
   661  			deleted[id.ID] = id.LabelArray
   662  		}
   663  	}
   664  
   665  	if len(deleted) > 0 && m.owner != nil {
   666  		m.owner.UpdateIdentities(nil, deleted)
   667  	}
   668  
   669  	m.restoredIdentities = nil // free memory
   670  }
   671  
   672  // Release is the reverse operation of AllocateIdentity() and releases the
   673  // identity again. This function may result in kvstore operations.
   674  // After the last user has released the ID, the returned lastUse value is true.
   675  func (m *CachingIdentityAllocator) Release(ctx context.Context, id *identity.Identity, notifyOwner bool) (released bool, err error) {
   676  	defer func() {
   677  		if released {
   678  			// decrement metrics, trigger checkpoint if local
   679  			metricVal := identity.ClusterLocalIdentityType
   680  			switch id.ID.Scope() {
   681  			case identity.IdentityScopeLocal:
   682  				metricVal = identity.NodeLocalIdentityType
   683  			case identity.IdentityScopeRemoteNode:
   684  				metricVal = identity.RemoteNodeIdentityType
   685  			}
   686  			if metricVal != identity.ClusterLocalIdentityType && m.checkpointTrigger != nil {
   687  				m.checkpointTrigger.Trigger()
   688  			}
   689  			for labelSource := range id.Labels.CollectSources() {
   690  				metrics.IdentityLabelSources.WithLabelValues(labelSource).Dec()
   691  			}
   692  			metrics.Identity.WithLabelValues(metricVal).Dec()
   693  		}
   694  
   695  		if m.owner != nil && released && notifyOwner {
   696  			deleted := identity.IdentityMap{
   697  				id.ID: id.LabelArray,
   698  			}
   699  			m.owner.UpdateIdentities(nil, deleted)
   700  		}
   701  	}()
   702  
   703  	// Ignore reserved identities.
   704  	if id.IsReserved() {
   705  		return false, nil
   706  	}
   707  
   708  	switch identity.ScopeForLabels(id.Labels) {
   709  	case identity.IdentityScopeLocal:
   710  		return m.localIdentities.release(id, notifyOwner), nil
   711  	case identity.IdentityScopeRemoteNode:
   712  		return m.localNodeIdentities.release(id, notifyOwner), nil
   713  	}
   714  
   715  	// This will block until the kvstore can be accessed and all identities
   716  	// were successfully synced
   717  	err = m.WaitForInitialGlobalIdentities(ctx)
   718  	if err != nil {
   719  		return false, err
   720  	}
   721  
   722  	if m.IdentityAllocator == nil {
   723  		return false, fmt.Errorf("allocator not initialized")
   724  	}
   725  
   726  	// Rely on the eventual Kv-Store events for delete
   727  	// notifications of kv-store allocated identities. Even if an
   728  	// ID is no longer used locally, it may still be used by
   729  	// remote nodes, so we can't rely on the locally computed
   730  	// "lastUse".
   731  	return m.IdentityAllocator.Release(ctx, &key.GlobalIdentity{LabelArray: id.LabelArray})
   732  }
   733  
   734  // WatchRemoteIdentities returns a RemoteCache instance which can be later
   735  // started to watch identities in another kvstore and sync them to the local
   736  // identity cache. remoteName should be unique unless replacing an existing
   737  // remote's backend. When cachedPrefix is set, identities are assumed to be
   738  // stored under the "cilium/cache" prefix, and the watcher is adapted accordingly.
   739  func (m *CachingIdentityAllocator) WatchRemoteIdentities(remoteName string, remoteID uint32, backend kvstore.BackendOperations, cachedPrefix bool) (*allocator.RemoteCache, error) {
   740  	<-m.globalIdentityAllocatorInitialized
   741  
   742  	prefix := m.identitiesPath
   743  	if cachedPrefix {
   744  		prefix = path.Join(kvstore.StateToCachePrefix(prefix), remoteName)
   745  	}
   746  
   747  	remoteAllocatorBackend, err := kvstoreallocator.NewKVStoreBackend(prefix, m.owner.GetNodeSuffix(), &key.GlobalIdentity{}, backend)
   748  	if err != nil {
   749  		return nil, fmt.Errorf("error setting up remote allocator backend: %w", err)
   750  	}
   751  
   752  	remoteAlloc, err := allocator.NewAllocator(&key.GlobalIdentity{}, remoteAllocatorBackend,
   753  		allocator.WithEvents(m.IdentityAllocator.GetEvents()), allocator.WithoutGC(), allocator.WithoutAutostart(),
   754  		allocator.WithCacheValidator(clusterIDValidator(remoteID)),
   755  		allocator.WithCacheValidator(clusterNameValidator(remoteName)),
   756  	)
   757  	if err != nil {
   758  		return nil, fmt.Errorf("unable to initialize remote Identity Allocator: %w", err)
   759  	}
   760  
   761  	return m.IdentityAllocator.NewRemoteCache(remoteName, remoteAlloc), nil
   762  }
   763  
   764  func (m *CachingIdentityAllocator) RemoveRemoteIdentities(name string) {
   765  	if m.IdentityAllocator != nil {
   766  		m.IdentityAllocator.RemoveRemoteKVStore(name)
   767  	}
   768  }
   769  
   770  type IdentityChangeKind string
   771  
   772  const (
   773  	IdentityChangeSync   IdentityChangeKind = IdentityChangeKind(allocator.AllocatorChangeSync)
   774  	IdentityChangeUpsert IdentityChangeKind = IdentityChangeKind(allocator.AllocatorChangeUpsert)
   775  	IdentityChangeDelete IdentityChangeKind = IdentityChangeKind(allocator.AllocatorChangeDelete)
   776  )
   777  
   778  type IdentityChange struct {
   779  	Kind   IdentityChangeKind
   780  	ID     identity.NumericIdentity
   781  	Labels labels.Labels
   782  }
   783  
   784  // Observe the identity changes. Conforms to stream.Observable.
   785  // Replays the current state of the cache when subscribing.
   786  func (m *CachingIdentityAllocator) Observe(ctx context.Context, next func(IdentityChange), complete func(error)) {
   787  	// This short-lived go routine serves the purpose of waiting for the global identity allocator becoming ready
   788  	// before starting to observe the underlying allocator for changes.
   789  	// m.IdentityAllocator is backed by a stream.FuncObservable, that will start its own
   790  	// go routine. Therefore, the current go routine will stop and free the lock on the setupMutex after the registration.
   791  	go func() {
   792  		if err := m.WaitForInitialGlobalIdentities(ctx); err != nil {
   793  			complete(ctx.Err())
   794  			return
   795  		}
   796  
   797  		m.setupMutex.Lock()
   798  		defer m.setupMutex.Unlock()
   799  
   800  		if m.IdentityAllocator == nil {
   801  			complete(errors.New("allocator no longer initialized"))
   802  			return
   803  		}
   804  
   805  		// Observe the underlying allocator for changes and map the events to identities.
   806  		stream.Map[allocator.AllocatorChange, IdentityChange](
   807  			m.IdentityAllocator,
   808  			func(change allocator.AllocatorChange) IdentityChange {
   809  				return IdentityChange{
   810  					Kind:   IdentityChangeKind(change.Kind),
   811  					ID:     identity.NumericIdentity(change.ID),
   812  					Labels: mapLabels(change.Key),
   813  				}
   814  			},
   815  		).Observe(ctx, next, complete)
   816  	}()
   817  }
   818  
   819  func mapLabels(allocatorKey allocator.AllocatorKey) labels.Labels {
   820  	var idLabels labels.Labels = nil
   821  
   822  	if allocatorKey != nil {
   823  		idLabels = labels.Labels{}
   824  		for k, v := range allocatorKey.GetAsMap() {
   825  			label := labels.ParseLabel(k + "=" + v)
   826  			idLabels[label.Key] = label
   827  		}
   828  	}
   829  
   830  	return idLabels
   831  }
   832  
   833  // clusterIDValidator returns a validator ensuring that the identity ID belongs
   834  // to the ClusterID range.
   835  func clusterIDValidator(clusterID uint32) allocator.CacheValidator {
   836  	min := idpool.ID(identity.GetMinimalAllocationIdentity(clusterID))
   837  	max := idpool.ID(identity.GetMaximumAllocationIdentity(clusterID))
   838  
   839  	return func(_ allocator.AllocatorChangeKind, id idpool.ID, _ allocator.AllocatorKey) error {
   840  		if id < min || id > max {
   841  			return fmt.Errorf("ID %d does not belong to the allocation range of cluster ID %d", id, clusterID)
   842  		}
   843  		return nil
   844  	}
   845  }
   846  
   847  // clusterNameValidator returns a validator ensuring that the identity labels
   848  // include the one specifying the correct cluster name.
   849  func clusterNameValidator(clusterName string) allocator.CacheValidator {
   850  	return func(kind allocator.AllocatorChangeKind, _ idpool.ID, ak allocator.AllocatorKey) error {
   851  		if kind != allocator.AllocatorChangeUpsert {
   852  			// Don't filter out deletion events, as labels may not be propagated,
   853  			// and to prevent leaving stale identities behind.
   854  			return nil
   855  		}
   856  
   857  		gi, ok := ak.(*key.GlobalIdentity)
   858  		if !ok {
   859  			return fmt.Errorf("unsupported key type %T", ak)
   860  		}
   861  
   862  		var found bool
   863  		for _, lbl := range gi.LabelArray {
   864  			if lbl.Key != api.PolicyLabelCluster {
   865  				continue
   866  			}
   867  
   868  			switch {
   869  			case lbl.Source != labels.LabelSourceK8s:
   870  				return fmt.Errorf("unexpected source for cluster label: got %s, expected %s", lbl.Source, labels.LabelSourceK8s)
   871  			case lbl.Value != clusterName:
   872  				return fmt.Errorf("unexpected cluster name: got %s, expected %s", lbl.Value, clusterName)
   873  			default:
   874  				found = true
   875  			}
   876  		}
   877  
   878  		if !found {
   879  			return fmt.Errorf("could not find expected label %s", api.PolicyLabelCluster)
   880  		}
   881  
   882  		return nil
   883  	}
   884  }