github.com/cilium/cilium@v1.16.2/pkg/identity/cache/allocator.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package cache 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "os" 11 "path" 12 "path/filepath" 13 14 "github.com/cilium/stream" 15 "github.com/google/renameio/v2" 16 jsoniter "github.com/json-iterator/go" 17 "github.com/sirupsen/logrus" 18 19 "github.com/cilium/cilium/pkg/allocator" 20 "github.com/cilium/cilium/pkg/controller" 21 "github.com/cilium/cilium/pkg/identity" 22 "github.com/cilium/cilium/pkg/identity/key" 23 "github.com/cilium/cilium/pkg/idpool" 24 api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io" 25 clientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned" 26 "github.com/cilium/cilium/pkg/k8s/identitybackend" 27 "github.com/cilium/cilium/pkg/kvstore" 28 kvstoreallocator "github.com/cilium/cilium/pkg/kvstore/allocator" 29 "github.com/cilium/cilium/pkg/labels" 30 "github.com/cilium/cilium/pkg/lock" 31 "github.com/cilium/cilium/pkg/logging/logfields" 32 "github.com/cilium/cilium/pkg/metrics" 33 "github.com/cilium/cilium/pkg/option" 34 "github.com/cilium/cilium/pkg/time" 35 "github.com/cilium/cilium/pkg/trigger" 36 ) 37 38 var ( 39 // IdentitiesPath is the path to where identities are stored in the 40 // key-value store. 41 IdentitiesPath = path.Join(kvstore.BaseKeyPrefix, "state", "identities", "v1") 42 ) 43 44 // The filename for the local allocator checkpoont. This is periodically 45 // written, and restored on restart. 46 // The full path is, by default, /run/cilium/state/local_allocator_state.json 47 const CheckpointFile = "local_allocator_state.json" 48 49 // CachingIdentityAllocator manages the allocation of identities for both 50 // global and local identities. 51 type CachingIdentityAllocator struct { 52 // IdentityAllocator is an allocator for security identities from the 53 // kvstore. 54 IdentityAllocator *allocator.Allocator 55 56 // globalIdentityAllocatorInitialized is closed whenever the global identity 57 // allocator is initialized. 58 globalIdentityAllocatorInitialized chan struct{} 59 60 localIdentities *localIdentityCache 61 62 localNodeIdentities *localIdentityCache 63 64 identitiesPath string 65 66 // This field exists is to hand out references that are either for sending 67 // and receiving. It should not be used directly without converting it first 68 // to a AllocatorEventSendChan or AllocatorEventRecvChan. 69 events allocator.AllocatorEventChan 70 watcher identityWatcher 71 72 // setupMutex synchronizes InitIdentityAllocator() and Close() 73 setupMutex lock.Mutex 74 75 owner IdentityAllocatorOwner 76 77 checkpointTrigger *trigger.Trigger 78 triggerDone <-chan struct{} 79 80 // restoredIdentities are the set of identities read in from a 81 // checkpoint on startup. These should be released, see `restoreLocalIdentities()` 82 // for more info. 83 restoredIdentities map[identity.NumericIdentity]*identity.Identity 84 85 // checkpointPath is the file where local allocator state should be checkpoointed. 86 // The default is /run/cilium/state/local_allocator_state.json, changed only for testing. 87 checkpointPath string 88 } 89 90 // IdentityAllocatorOwner is the interface the owner of an identity allocator 91 // must implement 92 type IdentityAllocatorOwner interface { 93 // UpdateIdentities will be called when identities have changed 94 // 95 // The caller is responsible for making sure the same identity 96 // is not present in both 'added' and 'deleted', so that they 97 // can be processed in either order. 98 UpdateIdentities(added, deleted identity.IdentityMap) 99 100 // GetSuffix must return the node specific suffix to use 101 GetNodeSuffix() string 102 } 103 104 // IdentityAllocator is any type which is responsible for allocating security 105 // identities based of sets of labels, and caching information about identities 106 // locally. 107 type IdentityAllocator interface { 108 // Identity changes are observable. 109 stream.Observable[IdentityChange] 110 111 // WaitForInitialGlobalIdentities waits for the initial set of global 112 // security identities to have been received. 113 WaitForInitialGlobalIdentities(context.Context) error 114 115 // AllocateIdentity allocates an identity described by the specified labels. 116 // A possible previously used numeric identity for these labels can be passed 117 // in as the last parameter; identity.InvalidIdentity must be passed if no 118 // previous numeric identity exists. 119 AllocateIdentity(context.Context, labels.Labels, bool, identity.NumericIdentity) (*identity.Identity, bool, error) 120 121 // Release is the reverse operation of AllocateIdentity() and releases the 122 // specified identity. 123 Release(context.Context, *identity.Identity, bool) (released bool, err error) 124 125 // LookupIdentityByID returns the identity that corresponds to the given 126 // labels. 127 LookupIdentity(ctx context.Context, lbls labels.Labels) *identity.Identity 128 129 // LookupIdentityByID returns the identity that corresponds to the given 130 // numeric identity. 131 LookupIdentityByID(ctx context.Context, id identity.NumericIdentity) *identity.Identity 132 133 // GetIdentityCache returns the current cache of identities that the 134 // allocator has allocated. The caller should not modify the resulting 135 // identities by pointer. 136 GetIdentityCache() identity.IdentityMap 137 138 // GetIdentities returns a copy of the current cache of identities. 139 GetIdentities() IdentitiesModel 140 141 // WithholdLocalIdentities holds a set of numeric identities out of the local 142 // allocation pool(s). Once withheld, a numeric identity can only be used 143 // when explicitly requested via AllocateIdentity(..., oldNID). 144 WithholdLocalIdentities(nids []identity.NumericIdentity) 145 146 // UnwithholdLocalIdentities removes numeric identities from the withheld set, 147 // freeing them for general allocation. 148 UnwithholdLocalIdentities(nids []identity.NumericIdentity) 149 } 150 151 // InitIdentityAllocator creates the global identity allocator. Only the first 152 // invocation of this function will have an effect. The Caller must have 153 // initialized well known identities before calling this (by calling 154 // identity.InitWellKnownIdentities()). 155 // The client is only used by the CRD identity allocator currently. 156 // Returns a channel which is closed when initialization of the allocator is 157 // completed. 158 // TODO: identity backends are initialized directly in this function, pulling 159 // in dependencies on kvstore and k8s. It would be better to decouple this, 160 // since the backends are an interface. 161 func (m *CachingIdentityAllocator) InitIdentityAllocator(client clientset.Interface) <-chan struct{} { 162 m.setupMutex.Lock() 163 defer m.setupMutex.Unlock() 164 165 if m.IdentityAllocator != nil { 166 log.Panic("InitIdentityAllocator() in succession without calling Close()") 167 } 168 169 log.Info("Initializing identity allocator") 170 171 minID := idpool.ID(identity.GetMinimalAllocationIdentity(option.Config.ClusterID)) 172 maxID := idpool.ID(identity.GetMaximumAllocationIdentity(option.Config.ClusterID)) 173 174 log.WithFields(map[string]interface{}{ 175 "min": minID, 176 "max": maxID, 177 "cluster-id": option.Config.ClusterID, 178 }).Info("Allocating identities between range") 179 180 // In the case of the allocator being closed, we need to create a new events channel 181 // and start a new watch. 182 if m.events == nil { 183 m.events = make(allocator.AllocatorEventChan, eventsQueueSize) 184 m.watcher.watch(m.events) 185 } 186 187 // Asynchronously set up the global identity allocator since it connects 188 // to the kvstore. 189 go func(owner IdentityAllocatorOwner, events allocator.AllocatorEventSendChan, minID, maxID idpool.ID) { 190 m.setupMutex.Lock() 191 defer m.setupMutex.Unlock() 192 193 var ( 194 backend allocator.Backend 195 err error 196 ) 197 198 switch option.Config.IdentityAllocationMode { 199 case option.IdentityAllocationModeKVstore: 200 log.Debug("Identity allocation backed by KVStore") 201 backend, err = kvstoreallocator.NewKVStoreBackend(m.identitiesPath, owner.GetNodeSuffix(), &key.GlobalIdentity{}, kvstore.Client()) 202 if err != nil { 203 log.WithError(err).Fatal("Unable to initialize kvstore backend for identity allocation") 204 } 205 206 case option.IdentityAllocationModeCRD: 207 log.Debug("Identity allocation backed by CRD") 208 backend, err = identitybackend.NewCRDBackend(identitybackend.CRDBackendConfiguration{ 209 Store: nil, 210 Client: client, 211 KeyFunc: (&key.GlobalIdentity{}).PutKeyFromMap, 212 }) 213 if err != nil { 214 log.WithError(err).Fatal("Unable to initialize Kubernetes CRD backend for identity allocation") 215 } 216 217 default: 218 log.Fatalf("Unsupported identity allocation mode %s", option.Config.IdentityAllocationMode) 219 } 220 221 a, err := allocator.NewAllocator(&key.GlobalIdentity{}, backend, 222 allocator.WithMax(maxID), allocator.WithMin(minID), 223 allocator.WithEvents(events), 224 allocator.WithMasterKeyProtection(), 225 allocator.WithPrefixMask(idpool.ID(option.Config.ClusterID<<identity.GetClusterIDShift()))) 226 if err != nil { 227 log.WithError(err).Fatalf("Unable to initialize Identity Allocator with backend %s", option.Config.IdentityAllocationMode) 228 } 229 230 m.IdentityAllocator = a 231 close(m.globalIdentityAllocatorInitialized) 232 }(m.owner, m.events, minID, maxID) 233 234 return m.globalIdentityAllocatorInitialized 235 } 236 237 // EnableCheckpointing enables checkpointing the local allocator state. 238 // The CachingIdentityAllocator is used in multiple places, but we only want to 239 // checkpoint the "primary" allocator 240 func (m *CachingIdentityAllocator) EnableCheckpointing() { 241 controllerManager := controller.NewManager() 242 controllerGroup := controller.NewGroup("identity-allocator") 243 controllerName := "local-identity-checkpoint" 244 triggerDone := make(chan struct{}) 245 t, _ := trigger.NewTrigger(trigger.Parameters{ 246 MinInterval: 10 * time.Second, 247 TriggerFunc: func(reasons []string) { 248 controllerManager.UpdateController(controllerName, controller.ControllerParams{ 249 Group: controllerGroup, 250 DoFunc: m.checkpoint, 251 StopFunc: m.checkpoint, // perform one last checkpoint when the controller is removed 252 }) 253 }, 254 ShutdownFunc: func() { 255 controllerManager.RemoveControllerAndWait(controllerName) // waits for StopFunc 256 close(triggerDone) 257 }, 258 }) 259 260 m.checkpointTrigger = t 261 m.triggerDone = triggerDone 262 } 263 264 const eventsQueueSize = 1024 265 266 // InitIdentityAllocator creates the identity allocator. Only the first 267 // invocation of this function will have an effect. The Caller must have 268 // initialized well known identities before calling this (by calling 269 // identity.InitWellKnownIdentities()). 270 // client and identityStore are only used by the CRD identity allocator, 271 // currently, and identityStore may be nil. 272 // Returns a channel which is closed when initialization of the allocator is 273 // completed. 274 // TODO: identity backends are initialized directly in this function, pulling 275 // in dependencies on kvstore and k8s. It would be better to decouple this, 276 // since the backends are an interface. 277 278 // NewCachingIdentityAllocator creates a new instance of an 279 // CachingIdentityAllocator. 280 func NewCachingIdentityAllocator(owner IdentityAllocatorOwner) *CachingIdentityAllocator { 281 watcher := identityWatcher{ 282 owner: owner, 283 } 284 285 m := &CachingIdentityAllocator{ 286 globalIdentityAllocatorInitialized: make(chan struct{}), 287 owner: owner, 288 identitiesPath: IdentitiesPath, 289 watcher: watcher, 290 events: make(allocator.AllocatorEventChan, eventsQueueSize), 291 } 292 if option.Config.RunDir != "" { // disable checkpointing if this is a unit test 293 m.checkpointPath = filepath.Join(option.Config.StateDir, CheckpointFile) 294 } 295 m.watcher.watch(m.events) 296 297 // Local identity cache can be created synchronously since it doesn't 298 // rely upon any external resources (e.g., external kvstore). 299 m.localIdentities = newLocalIdentityCache(identity.IdentityScopeLocal, identity.MinAllocatorLocalIdentity, identity.MaxAllocatorLocalIdentity, m.events) 300 m.localNodeIdentities = newLocalIdentityCache(identity.IdentityScopeRemoteNode, identity.MinAllocatorLocalIdentity, identity.MaxAllocatorLocalIdentity, m.events) 301 302 return m 303 } 304 305 // Close closes the identity allocator 306 func (m *CachingIdentityAllocator) Close() { 307 m.setupMutex.Lock() 308 defer m.setupMutex.Unlock() 309 310 if m.checkpointTrigger != nil { 311 m.checkpointTrigger.Shutdown() 312 <-m.triggerDone 313 m.checkpointTrigger = nil 314 } 315 316 select { 317 case <-m.globalIdentityAllocatorInitialized: 318 // This means the channel was closed and therefore the IdentityAllocator == nil will never be true 319 default: 320 if m.IdentityAllocator == nil { 321 log.Error("Close() called without calling InitIdentityAllocator() first") 322 return 323 } 324 } 325 326 m.IdentityAllocator.Delete() 327 if m.events != nil { 328 m.localIdentities.close() 329 m.localNodeIdentities.close() 330 close(m.events) 331 m.events = nil 332 } 333 334 m.IdentityAllocator = nil 335 m.globalIdentityAllocatorInitialized = make(chan struct{}) 336 } 337 338 // WaitForInitialGlobalIdentities waits for the initial set of global security 339 // identities to have been received and populated into the allocator cache. 340 func (m *CachingIdentityAllocator) WaitForInitialGlobalIdentities(ctx context.Context) error { 341 select { 342 case <-m.globalIdentityAllocatorInitialized: 343 case <-ctx.Done(): 344 return fmt.Errorf("initial global identity sync was cancelled: %w", ctx.Err()) 345 } 346 347 return m.IdentityAllocator.WaitForInitialSync(ctx) 348 } 349 350 var ErrNonLocalIdentity = fmt.Errorf("labels would result in global identity") 351 352 // AllocateLocalIdentity works the same as AllocateIdentity, but it guarantees that the allocated 353 // identity will be local-only. If the provided set of labels does not map to a local identity scope, 354 // this will return an error. 355 func (m *CachingIdentityAllocator) AllocateLocalIdentity(lbls labels.Labels, notifyOwner bool, oldNID identity.NumericIdentity) (id *identity.Identity, allocated bool, err error) { 356 357 // If this is a reserved, pre-allocated identity, just return that and be done 358 if reservedIdentity := identity.LookupReservedIdentityByLabels(lbls); reservedIdentity != nil { 359 if option.Config.Debug { 360 log.WithFields(logrus.Fields{ 361 logfields.Identity: reservedIdentity.ID, 362 logfields.IdentityLabels: lbls.String(), 363 "isNew": false, 364 }).Debug("Resolving reserved identity") 365 } 366 return reservedIdentity, false, nil 367 } 368 369 if option.Config.Debug { 370 log.WithFields(logrus.Fields{ 371 logfields.IdentityLabels: lbls.String(), 372 }).Debug("Resolving local identity") 373 } 374 375 // Allocate according to scope 376 var metricLabel string 377 switch scope := identity.ScopeForLabels(lbls); scope { 378 case identity.IdentityScopeLocal: 379 id, allocated, err = m.localIdentities.lookupOrCreate(lbls, oldNID, notifyOwner) 380 metricLabel = identity.NodeLocalIdentityType 381 case identity.IdentityScopeRemoteNode: 382 id, allocated, err = m.localNodeIdentities.lookupOrCreate(lbls, oldNID, notifyOwner) 383 metricLabel = identity.RemoteNodeIdentityType 384 default: 385 log.WithFields(logrus.Fields{ 386 logfields.Labels: lbls, 387 "scope": scope, 388 }).Error("BUG: attempt to allocate local identity for labels, but a global identity is required") 389 return nil, false, ErrNonLocalIdentity 390 } 391 if err != nil { 392 return nil, false, err 393 } 394 395 if allocated { 396 metrics.Identity.WithLabelValues(metricLabel).Inc() 397 for labelSource := range lbls.CollectSources() { 398 metrics.IdentityLabelSources.WithLabelValues(labelSource).Inc() 399 } 400 401 if m.checkpointTrigger != nil { 402 m.checkpointTrigger.Trigger() 403 } 404 405 if notifyOwner { 406 added := identity.IdentityMap{ 407 id.ID: id.LabelArray, 408 } 409 m.owner.UpdateIdentities(added, nil) 410 } 411 } 412 413 return 414 } 415 416 // needsGlobalIdentity returns true if these labels require 417 // allocating a global identity 418 func needsGlobalIdentity(lbls labels.Labels) bool { 419 // If lbls corresponds to a reserved identity, no global allocation required 420 if identity.LookupReservedIdentityByLabels(lbls) != nil { 421 return false 422 } 423 424 // determine identity scope from labels, 425 return identity.ScopeForLabels(lbls) == identity.IdentityScopeGlobal 426 } 427 428 // AllocateIdentity allocates an identity described by the specified labels. If 429 // an identity for the specified set of labels already exist, the identity is 430 // re-used and reference counting is performed, otherwise a new identity is 431 // allocated via the kvstore or via the local identity allocator. 432 // A possible previously used numeric identity for these labels can be passed 433 // in as the 'oldNID' parameter; identity.InvalidIdentity must be passed if no 434 // previous numeric identity exists. 435 func (m *CachingIdentityAllocator) AllocateIdentity(ctx context.Context, lbls labels.Labels, notifyOwner bool, oldNID identity.NumericIdentity) (id *identity.Identity, allocated bool, err error) { 436 if !needsGlobalIdentity(lbls) { 437 return m.AllocateLocalIdentity(lbls, notifyOwner, oldNID) 438 } 439 440 if option.Config.Debug { 441 log.WithFields(logrus.Fields{ 442 logfields.IdentityLabels: lbls.String(), 443 }).Debug("Resolving global identity") 444 } 445 446 // This will block until the kvstore can be accessed and all identities 447 // were successfully synced 448 err = m.WaitForInitialGlobalIdentities(ctx) 449 if err != nil { 450 return nil, false, err 451 } 452 453 if m.IdentityAllocator == nil { 454 return nil, false, fmt.Errorf("allocator not initialized") 455 } 456 457 idp, allocated, isNewLocally, err := m.IdentityAllocator.Allocate(ctx, &key.GlobalIdentity{LabelArray: lbls.LabelArray()}) 458 if err != nil { 459 return nil, false, err 460 } 461 if idp > identity.MaxNumericIdentity { 462 return nil, false, fmt.Errorf("%d: numeric identity too large", idp) 463 } 464 id = identity.NewIdentity(identity.NumericIdentity(idp), lbls) 465 466 if option.Config.Debug { 467 log.WithFields(logrus.Fields{ 468 logfields.Identity: idp, 469 logfields.IdentityLabels: lbls.String(), 470 "isNew": allocated, 471 "isNewLocally": isNewLocally, 472 }).Debug("Resolved identity") 473 } 474 475 if allocated || isNewLocally { 476 metrics.Identity.WithLabelValues(identity.ClusterLocalIdentityType).Inc() 477 for labelSource := range lbls.CollectSources() { 478 metrics.IdentityLabelSources.WithLabelValues(labelSource).Inc() 479 } 480 } 481 482 // Notify the owner of the newly added identities so that the 483 // cached identities can be updated ASAP, rather than just 484 // relying on the kv-store update events. 485 if allocated && notifyOwner { 486 added := identity.IdentityMap{ 487 id.ID: id.LabelArray, 488 } 489 m.owner.UpdateIdentities(added, nil) 490 } 491 492 return id, allocated, nil 493 } 494 495 func (m *CachingIdentityAllocator) WithholdLocalIdentities(nids []identity.NumericIdentity) { 496 log.WithField(logfields.Identity, nids).Debug("Withholding numeric identities for later restoration") 497 498 // The allocators will return any identities that are not in-scope. 499 nids = m.localIdentities.withhold(nids) 500 nids = m.localNodeIdentities.withhold(nids) 501 if len(nids) > 0 { 502 log.WithField(logfields.Identity, nids).Error("Attempt to restore invalid numeric identities.") 503 } 504 } 505 506 func (m *CachingIdentityAllocator) UnwithholdLocalIdentities(nids []identity.NumericIdentity) { 507 log.WithField(logfields.Identity, nids).Debug("Unwithholding numeric identities") 508 509 // The allocators will ignore any identities that are not in-scope. 510 m.localIdentities.unwithhold(nids) 511 m.localNodeIdentities.unwithhold(nids) 512 } 513 514 // checkpoint writes the state of the local allocators to disk. This is used for restoration, 515 // to ensure that numeric identities are, as much as possible, stable across agent restarts. 516 // 517 // Do not call this directly, rather, use m.checkpointTrigger.Trigger() 518 func (m *CachingIdentityAllocator) checkpoint(ctx context.Context) error { 519 if m.checkpointPath == "" { 520 return nil // this is a unit test 521 } 522 log := log.WithField(logfields.Path, m.checkpointPath) 523 524 ids := make([]*identity.Identity, 0, m.localIdentities.size()+m.localNodeIdentities.size()) 525 ids = m.localIdentities.checkpoint(ids) 526 ids = m.localNodeIdentities.checkpoint(ids) 527 528 // use renameio to prevent partial writes 529 out, err := renameio.NewPendingFile(m.checkpointPath, renameio.WithExistingPermissions(), renameio.WithPermissions(0o600)) 530 if err != nil { 531 log.WithError(err).Error("failed to prepare checkpoint file") 532 return err 533 } 534 defer out.Cleanup() 535 536 jw := jsoniter.ConfigFastest.NewEncoder(out) 537 if err := jw.Encode(ids); err != nil { 538 log.WithError(err).Error("failed to marshal identity checkpoint state") 539 return err 540 } 541 if err := out.CloseAtomicallyReplace(); err != nil { 542 log.WithError(err).Error("failed to write identity checkpoint file") 543 return err 544 } 545 log.Debug("Wrote local identity allocator checkpoint") 546 return nil 547 } 548 549 // RestoreLocalIdentities reads in the checkpointed local allocator state 550 // from disk and allocates a reference to every previously existing identity. 551 // 552 // Once all identity-allocating objects are synchronized (e.g. network policies, 553 // remote nodes), call ReleaseRestoredIdentities to release the held references. 554 func (m *CachingIdentityAllocator) RestoreLocalIdentities() (map[identity.NumericIdentity]*identity.Identity, error) { 555 if m.checkpointPath == "" { 556 return nil, nil // unit test 557 } 558 log := log.WithField(logfields.Path, m.checkpointPath) 559 560 // Read in checkpoint file 561 fp, err := os.Open(m.checkpointPath) 562 if err != nil { 563 if os.IsNotExist(err) { 564 log.Info("No identity checkpoint file found, skipping restoration") 565 return nil, nil 566 } 567 return nil, fmt.Errorf("failed to open identity checkpoint file %s: %w", m.checkpointPath, err) 568 } 569 defer fp.Close() 570 571 jr := jsoniter.ConfigFastest.NewDecoder(fp) 572 var ids []*identity.Identity 573 if err := jr.Decode(&ids); err != nil { 574 return nil, fmt.Errorf("failed to parse identity checkpoint file %s: %w", m.checkpointPath, err) 575 } 576 577 if len(ids) == 0 { 578 return nil, nil 579 } 580 581 // Load in checkpoint: 582 // - withhold numeric identities 583 // - allocate previous identities 584 // - update SelectorCache 585 // - unwithhold numeric IDs 586 587 log.WithField(logfields.Count, len(ids)).Info("Restoring checkpointed local identities") 588 m.restoredIdentities = make(map[identity.NumericIdentity]*identity.Identity, len(ids)) 589 added := make(identity.IdentityMap, len(ids)) 590 591 // Withhold restored local identities from allocation (except by request). 592 // This is insurance against a code change causing identities to be allocated 593 // differently, which could disrupt restoration. 594 // Withholding numeric IDs prevents them from being allocated except by explicit request. 595 oldNumIDs := make([]identity.NumericIdentity, 0, len(ids)) 596 for _, id := range ids { 597 oldNumIDs = append(oldNumIDs, id.ID) 598 } 599 m.WithholdLocalIdentities(oldNumIDs) 600 601 for _, oldID := range ids { 602 // Ensure we do not restore any global identities or identities that somehow are 603 // changing scope. There's no point, as the numeric identity will be different. 604 if scope := identity.ScopeForLabels(oldID.Labels); scope != oldID.ID.Scope() || needsGlobalIdentity(oldID.Labels) { 605 // Should not happen, except when the scope for labels changes 606 // such as disabling policy-cidr-match-mode=nodes 607 log.WithFields(logrus.Fields{ 608 logfields.Identity: oldID, 609 "scope": scope, 610 }).Warn("skipping restore of non-local or re-scoped identity") 611 continue 612 } 613 614 newID, _, err := m.AllocateLocalIdentity( 615 oldID.Labels, 616 false, // do not add to selector cache; we'll batch that later 617 oldID.ID, // request previous numeric ID 618 ) 619 if err != nil { 620 log.WithError(err).WithField(logfields.Identity, oldID).Error("failed to restore checkpointed local identity, continuing") 621 } else { 622 m.restoredIdentities[newID.ID] = newID 623 added[newID.ID] = newID.LabelArray 624 if newID.ID != oldID.ID { 625 // Paranoia, shouldn't happen 626 log.WithField(logfields.Identity, newID).Warn("Restored local identity has different numeric ID") 627 } 628 } 629 } 630 631 // Add identities to SelectorCache 632 if m.owner != nil { 633 m.owner.UpdateIdentities(added, nil) 634 } 635 636 // Release all withheld numeric identities back for general use. 637 m.UnwithholdLocalIdentities(oldNumIDs) 638 639 // return the set of restored identities, which is useful for prefix restoration 640 return m.restoredIdentities, nil 641 } 642 643 // ReleaseRestoredIdentities releases any identities that were restored, reducing their reference 644 // count and cleaning up as necessary. 645 func (m *CachingIdentityAllocator) ReleaseRestoredIdentities() { 646 deleted := make(identity.IdentityMap, len(m.restoredIdentities)) 647 for _, id := range m.restoredIdentities { 648 released, err := m.Release(context.Background(), id, false) 649 if err != nil { 650 // This should never happen; these IDs are local 651 log.WithError(err).WithField(logfields.Identity, id).Error("failed to release restored identity") 652 continue 653 } 654 if option.Config.Debug { 655 log.WithFields(logrus.Fields{ 656 logfields.Identity: id, 657 "released": released, 658 }).Debug("Released restored identity reference") 659 } 660 if released { 661 deleted[id.ID] = id.LabelArray 662 } 663 } 664 665 if len(deleted) > 0 && m.owner != nil { 666 m.owner.UpdateIdentities(nil, deleted) 667 } 668 669 m.restoredIdentities = nil // free memory 670 } 671 672 // Release is the reverse operation of AllocateIdentity() and releases the 673 // identity again. This function may result in kvstore operations. 674 // After the last user has released the ID, the returned lastUse value is true. 675 func (m *CachingIdentityAllocator) Release(ctx context.Context, id *identity.Identity, notifyOwner bool) (released bool, err error) { 676 defer func() { 677 if released { 678 // decrement metrics, trigger checkpoint if local 679 metricVal := identity.ClusterLocalIdentityType 680 switch id.ID.Scope() { 681 case identity.IdentityScopeLocal: 682 metricVal = identity.NodeLocalIdentityType 683 case identity.IdentityScopeRemoteNode: 684 metricVal = identity.RemoteNodeIdentityType 685 } 686 if metricVal != identity.ClusterLocalIdentityType && m.checkpointTrigger != nil { 687 m.checkpointTrigger.Trigger() 688 } 689 for labelSource := range id.Labels.CollectSources() { 690 metrics.IdentityLabelSources.WithLabelValues(labelSource).Dec() 691 } 692 metrics.Identity.WithLabelValues(metricVal).Dec() 693 } 694 695 if m.owner != nil && released && notifyOwner { 696 deleted := identity.IdentityMap{ 697 id.ID: id.LabelArray, 698 } 699 m.owner.UpdateIdentities(nil, deleted) 700 } 701 }() 702 703 // Ignore reserved identities. 704 if id.IsReserved() { 705 return false, nil 706 } 707 708 switch identity.ScopeForLabels(id.Labels) { 709 case identity.IdentityScopeLocal: 710 return m.localIdentities.release(id, notifyOwner), nil 711 case identity.IdentityScopeRemoteNode: 712 return m.localNodeIdentities.release(id, notifyOwner), nil 713 } 714 715 // This will block until the kvstore can be accessed and all identities 716 // were successfully synced 717 err = m.WaitForInitialGlobalIdentities(ctx) 718 if err != nil { 719 return false, err 720 } 721 722 if m.IdentityAllocator == nil { 723 return false, fmt.Errorf("allocator not initialized") 724 } 725 726 // Rely on the eventual Kv-Store events for delete 727 // notifications of kv-store allocated identities. Even if an 728 // ID is no longer used locally, it may still be used by 729 // remote nodes, so we can't rely on the locally computed 730 // "lastUse". 731 return m.IdentityAllocator.Release(ctx, &key.GlobalIdentity{LabelArray: id.LabelArray}) 732 } 733 734 // WatchRemoteIdentities returns a RemoteCache instance which can be later 735 // started to watch identities in another kvstore and sync them to the local 736 // identity cache. remoteName should be unique unless replacing an existing 737 // remote's backend. When cachedPrefix is set, identities are assumed to be 738 // stored under the "cilium/cache" prefix, and the watcher is adapted accordingly. 739 func (m *CachingIdentityAllocator) WatchRemoteIdentities(remoteName string, remoteID uint32, backend kvstore.BackendOperations, cachedPrefix bool) (*allocator.RemoteCache, error) { 740 <-m.globalIdentityAllocatorInitialized 741 742 prefix := m.identitiesPath 743 if cachedPrefix { 744 prefix = path.Join(kvstore.StateToCachePrefix(prefix), remoteName) 745 } 746 747 remoteAllocatorBackend, err := kvstoreallocator.NewKVStoreBackend(prefix, m.owner.GetNodeSuffix(), &key.GlobalIdentity{}, backend) 748 if err != nil { 749 return nil, fmt.Errorf("error setting up remote allocator backend: %w", err) 750 } 751 752 remoteAlloc, err := allocator.NewAllocator(&key.GlobalIdentity{}, remoteAllocatorBackend, 753 allocator.WithEvents(m.IdentityAllocator.GetEvents()), allocator.WithoutGC(), allocator.WithoutAutostart(), 754 allocator.WithCacheValidator(clusterIDValidator(remoteID)), 755 allocator.WithCacheValidator(clusterNameValidator(remoteName)), 756 ) 757 if err != nil { 758 return nil, fmt.Errorf("unable to initialize remote Identity Allocator: %w", err) 759 } 760 761 return m.IdentityAllocator.NewRemoteCache(remoteName, remoteAlloc), nil 762 } 763 764 func (m *CachingIdentityAllocator) RemoveRemoteIdentities(name string) { 765 if m.IdentityAllocator != nil { 766 m.IdentityAllocator.RemoveRemoteKVStore(name) 767 } 768 } 769 770 type IdentityChangeKind string 771 772 const ( 773 IdentityChangeSync IdentityChangeKind = IdentityChangeKind(allocator.AllocatorChangeSync) 774 IdentityChangeUpsert IdentityChangeKind = IdentityChangeKind(allocator.AllocatorChangeUpsert) 775 IdentityChangeDelete IdentityChangeKind = IdentityChangeKind(allocator.AllocatorChangeDelete) 776 ) 777 778 type IdentityChange struct { 779 Kind IdentityChangeKind 780 ID identity.NumericIdentity 781 Labels labels.Labels 782 } 783 784 // Observe the identity changes. Conforms to stream.Observable. 785 // Replays the current state of the cache when subscribing. 786 func (m *CachingIdentityAllocator) Observe(ctx context.Context, next func(IdentityChange), complete func(error)) { 787 // This short-lived go routine serves the purpose of waiting for the global identity allocator becoming ready 788 // before starting to observe the underlying allocator for changes. 789 // m.IdentityAllocator is backed by a stream.FuncObservable, that will start its own 790 // go routine. Therefore, the current go routine will stop and free the lock on the setupMutex after the registration. 791 go func() { 792 if err := m.WaitForInitialGlobalIdentities(ctx); err != nil { 793 complete(ctx.Err()) 794 return 795 } 796 797 m.setupMutex.Lock() 798 defer m.setupMutex.Unlock() 799 800 if m.IdentityAllocator == nil { 801 complete(errors.New("allocator no longer initialized")) 802 return 803 } 804 805 // Observe the underlying allocator for changes and map the events to identities. 806 stream.Map[allocator.AllocatorChange, IdentityChange]( 807 m.IdentityAllocator, 808 func(change allocator.AllocatorChange) IdentityChange { 809 return IdentityChange{ 810 Kind: IdentityChangeKind(change.Kind), 811 ID: identity.NumericIdentity(change.ID), 812 Labels: mapLabels(change.Key), 813 } 814 }, 815 ).Observe(ctx, next, complete) 816 }() 817 } 818 819 func mapLabels(allocatorKey allocator.AllocatorKey) labels.Labels { 820 var idLabels labels.Labels = nil 821 822 if allocatorKey != nil { 823 idLabels = labels.Labels{} 824 for k, v := range allocatorKey.GetAsMap() { 825 label := labels.ParseLabel(k + "=" + v) 826 idLabels[label.Key] = label 827 } 828 } 829 830 return idLabels 831 } 832 833 // clusterIDValidator returns a validator ensuring that the identity ID belongs 834 // to the ClusterID range. 835 func clusterIDValidator(clusterID uint32) allocator.CacheValidator { 836 min := idpool.ID(identity.GetMinimalAllocationIdentity(clusterID)) 837 max := idpool.ID(identity.GetMaximumAllocationIdentity(clusterID)) 838 839 return func(_ allocator.AllocatorChangeKind, id idpool.ID, _ allocator.AllocatorKey) error { 840 if id < min || id > max { 841 return fmt.Errorf("ID %d does not belong to the allocation range of cluster ID %d", id, clusterID) 842 } 843 return nil 844 } 845 } 846 847 // clusterNameValidator returns a validator ensuring that the identity labels 848 // include the one specifying the correct cluster name. 849 func clusterNameValidator(clusterName string) allocator.CacheValidator { 850 return func(kind allocator.AllocatorChangeKind, _ idpool.ID, ak allocator.AllocatorKey) error { 851 if kind != allocator.AllocatorChangeUpsert { 852 // Don't filter out deletion events, as labels may not be propagated, 853 // and to prevent leaving stale identities behind. 854 return nil 855 } 856 857 gi, ok := ak.(*key.GlobalIdentity) 858 if !ok { 859 return fmt.Errorf("unsupported key type %T", ak) 860 } 861 862 var found bool 863 for _, lbl := range gi.LabelArray { 864 if lbl.Key != api.PolicyLabelCluster { 865 continue 866 } 867 868 switch { 869 case lbl.Source != labels.LabelSourceK8s: 870 return fmt.Errorf("unexpected source for cluster label: got %s, expected %s", lbl.Source, labels.LabelSourceK8s) 871 case lbl.Value != clusterName: 872 return fmt.Errorf("unexpected cluster name: got %s, expected %s", lbl.Value, clusterName) 873 default: 874 found = true 875 } 876 } 877 878 if !found { 879 return fmt.Errorf("could not find expected label %s", api.PolicyLabelCluster) 880 } 881 882 return nil 883 } 884 }