github.com/cilium/cilium@v1.16.2/pkg/endpoint/policy.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package endpoint 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "net/netip" 11 "os" 12 "path" 13 "strconv" 14 "strings" 15 16 "github.com/cilium/ebpf" 17 "github.com/sirupsen/logrus" 18 19 "github.com/cilium/cilium/pkg/completion" 20 "github.com/cilium/cilium/pkg/controller" 21 dptypes "github.com/cilium/cilium/pkg/datapath/types" 22 endpointid "github.com/cilium/cilium/pkg/endpoint/id" 23 "github.com/cilium/cilium/pkg/endpoint/regeneration" 24 "github.com/cilium/cilium/pkg/eventqueue" 25 identityPkg "github.com/cilium/cilium/pkg/identity" 26 "github.com/cilium/cilium/pkg/identity/identitymanager" 27 "github.com/cilium/cilium/pkg/ip" 28 "github.com/cilium/cilium/pkg/ipcache" 29 "github.com/cilium/cilium/pkg/labels" 30 "github.com/cilium/cilium/pkg/logging" 31 "github.com/cilium/cilium/pkg/logging/logfields" 32 monitorAPI "github.com/cilium/cilium/pkg/monitor/api" 33 "github.com/cilium/cilium/pkg/node" 34 nodeTypes "github.com/cilium/cilium/pkg/node/types" 35 "github.com/cilium/cilium/pkg/option" 36 "github.com/cilium/cilium/pkg/policy" 37 policyTypes "github.com/cilium/cilium/pkg/policy/types" 38 "github.com/cilium/cilium/pkg/revert" 39 "github.com/cilium/cilium/pkg/time" 40 "github.com/cilium/cilium/pkg/types" 41 "github.com/cilium/cilium/pkg/u8proto" 42 ) 43 44 var ( 45 endpointRegenerationRecoveryControllerGroup = controller.NewGroup("endpoint-regeneration-recovery") 46 syncAddressIdentityMappingControllerGroup = controller.NewGroup("sync-address-identity-mapping") 47 ) 48 49 // HasBPFPolicyMap returns true if policy map changes should be collected 50 // Deprecated: use (e *Endpoint).IsProperty(PropertySkipBPFPolicy) 51 func (e *Endpoint) HasBPFPolicyMap() bool { 52 return !e.IsProperty(PropertySkipBPFPolicy) 53 } 54 55 // GetNamedPort returns the port for the given name. 56 // Must be called with e.mutex NOT held 57 func (e *Endpoint) GetNamedPort(ingress bool, name string, proto uint8) uint16 { 58 if ingress { 59 // Ingress only needs the ports of the POD itself 60 return e.getNamedPortIngress(e.GetK8sPorts(), name, proto) 61 } 62 // egress needs named ports of all the pods 63 return e.getNamedPortEgress(e.namedPortsGetter.GetNamedPorts(), name, proto) 64 } 65 66 func (e *Endpoint) getNamedPortIngress(npMap types.NamedPortMap, name string, proto uint8) uint16 { 67 port, err := npMap.GetNamedPort(name, proto) 68 if err != nil && e.logLimiter.Allow() { 69 e.getLogger().WithFields(logrus.Fields{ 70 logfields.PortName: name, 71 logfields.Protocol: u8proto.U8proto(proto).String(), 72 logfields.TrafficDirection: "ingress", 73 }).WithError(err).Warning("Skipping named port") 74 } 75 return port 76 } 77 78 func (e *Endpoint) getNamedPortEgress(npMap types.NamedPortMultiMap, name string, proto uint8) uint16 { 79 port, err := npMap.GetNamedPort(name, proto) 80 // Skip logging for ErrUnknownNamedPort on egress, as the destination POD with the port name 81 // is likely not scheduled yet. 82 if err != nil && !errors.Is(err, types.ErrUnknownNamedPort) && e.logLimiter.Allow() { 83 e.getLogger().WithFields(logrus.Fields{ 84 logfields.PortName: name, 85 logfields.Protocol: u8proto.U8proto(proto).String(), 86 logfields.TrafficDirection: "egress", 87 }).WithError(err).Warning("Skipping named port") 88 } 89 return port 90 } 91 92 // proxyID returns a unique string to identify a proxy mapping, 93 // and the resolved destination port number, if any. 94 // For port ranges the proxy is identified by the first port in 95 // the range, as overlapping proxy port ranges are not supported. 96 // Must be called with e.mutex held. 97 func (e *Endpoint) proxyID(l4 *policy.L4Filter, listener string) (string, uint16, uint8) { 98 port := l4.Port 99 protocol := uint8(l4.U8Proto) 100 // Calculate protocol if it is 0 (default) and 101 // is not "ANY" (that is, it was not calculated). 102 if protocol == 0 && !l4.Protocol.IsAny() { 103 proto, _ := u8proto.ParseProtocol(string(l4.Protocol)) 104 protocol = uint8(proto) 105 } 106 if port == 0 && l4.PortName != "" { 107 port = e.GetNamedPort(l4.Ingress, l4.PortName, protocol) 108 if port == 0 { 109 return "", 0, 0 110 } 111 } 112 113 return policy.ProxyID(e.ID, l4.Ingress, string(l4.Protocol), port, listener), port, protocol 114 } 115 116 var unrealizedRedirect = errors.New("Proxy port for redirect not found") 117 118 // LookupRedirectPort returns the redirect L4 proxy port for the given input parameters. 119 // Returns 0 if not found or the filter doesn't require a redirect. 120 // Returns an error if the redirect port can not be found. 121 func (e *Endpoint) LookupRedirectPort(ingress bool, protocol string, port uint16, listener string) (uint16, error) { 122 redirects := e.GetRealizedRedirects() 123 proxyPort, exists := redirects[policy.ProxyID(e.ID, ingress, protocol, port, listener)] 124 if !exists { 125 return 0, unrealizedRedirect 126 } 127 return proxyPort, nil 128 } 129 130 // Note that this function assumes that endpoint policy has already been generated! 131 // must be called with endpoint.mutex held for reading 132 func (e *Endpoint) updateNetworkPolicy(proxyWaitGroup *completion.WaitGroup) (reterr error, revertFunc revert.RevertFunc) { 133 // Skip updating the NetworkPolicy if no identity has been computed for this 134 // endpoint. 135 if e.SecurityIdentity == nil { 136 return nil, nil 137 } 138 139 // If desired policy is nil then no policy change is needed. 140 if e.desiredPolicy == nil { 141 return nil, nil 142 } 143 144 if e.IsProxyDisabled() { 145 return nil, nil 146 } 147 148 // Publish the updated policy to L7 proxies. 149 return e.proxy.UpdateNetworkPolicy(e, e.visibilityPolicy, &e.desiredPolicy.L4Policy, e.desiredPolicy.IngressPolicyEnabled, e.desiredPolicy.EgressPolicyEnabled, proxyWaitGroup) 150 } 151 152 // setNextPolicyRevision updates the desired policy revision field 153 // Must be called with the endpoint lock held for at least reading 154 func (e *Endpoint) setNextPolicyRevision(revision uint64) { 155 e.nextPolicyRevision = revision 156 e.UpdateLogger(map[string]interface{}{ 157 logfields.DesiredPolicyRevision: e.nextPolicyRevision, 158 }) 159 } 160 161 type policyGenerateResult struct { 162 policyRevision uint64 163 selectorPolicy policy.SelectorPolicy 164 endpointPolicy *policy.EndpointPolicy 165 identityRevision int 166 } 167 168 // regeneratePolicy computes the policy for the given endpoint based off of the 169 // rules in regeneration.Owner's policy repository. 170 // 171 // Policy generation may fail, and in that case we exit before actually changing 172 // the policy in any way, so that the last policy remains fully in effect if the 173 // new policy can not be implemented. This is done on a per endpoint-basis, 174 // however, and it is possible that policy update succeeds for some endpoints, 175 // while it fails for other endpoints. 176 // 177 // Failure may be due to any error in obtaining information for computing policy, 178 // or if policy could not be generated given the current set of rules in the repository. 179 // 180 // endpoint lock must NOT be held. This is because the ipcache needs to be able to 181 // make progress while generating policy, and *that* needs the endpoint unlocked to call 182 // ep.ApplyPolicyMapChanges. Specifically, computing policy may cause identity allocation 183 // which requires ipcache progress. 184 // 185 // buildMutex MUST be held, and not released until setDesiredPolicy and 186 // updateRealizedState have been called 187 // 188 // There are a few fields that depend on this exact configuration of locking: 189 // - ep.desiredPolicy: ep.mutex must be locked between writing this and committing to 190 // the policy maps, or else policy drops may occur 191 // - ep.policyRevision: ep.mutex and ep.buildMutex must be held to write to this 192 // - ep.selectorPolicy: this may be nulled if the endpoints identity changes; we must 193 // check for this when committing. ep.mutex must be held 194 // - ep.realizedRedirects: this is read by external callers as part of policy generation, 195 // so ep.mutex must not be required to read this. Instead, both ep.mutex and ep.buildMutex 196 // must be held to write to this (i.e. we are deep in regeneration) 197 // 198 // Returns a result that should be passed to setDesiredPolicy after the endpoint's 199 // write lock has been acquired, or err if recomputing policy failed. 200 func (e *Endpoint) regeneratePolicy(stats *regenerationStatistics) (*policyGenerateResult, error) { 201 var err error 202 203 // lock the endpoint, read our values, then unlock 204 err = e.rlockAlive() 205 if err != nil { 206 return nil, err 207 } 208 209 // No point in calculating policy if endpoint does not have an identity yet. 210 if e.SecurityIdentity == nil { 211 e.getLogger().Warn("Endpoint lacks identity, skipping policy calculation") 212 e.runlock() 213 return nil, nil 214 } 215 216 // Copy out some values we care about, then unlock 217 forcePolicyCompute := e.forcePolicyCompute 218 securityIdentity := e.SecurityIdentity 219 220 // We are computing policy; set this to false. 221 // We do this now, not in setDesiredPolicy(), because if another caller 222 // comes in and forces computation, we should leave that for the *next* 223 // regeneration. 224 e.forcePolicyCompute = false 225 226 result := &policyGenerateResult{ 227 selectorPolicy: e.selectorPolicy, 228 endpointPolicy: e.desiredPolicy, 229 identityRevision: e.identityRevision, 230 } 231 e.runlock() 232 233 e.getLogger().Debug("Starting policy recalculation...") 234 235 stats.waitingForPolicyRepository.Start() 236 repo := e.policyGetter.GetPolicyRepository() 237 repo.Mutex.RLock() // Be sure to release this lock! 238 stats.waitingForPolicyRepository.End(true) 239 240 result.policyRevision = repo.GetRevision() 241 242 // Recompute policy for this endpoint only if not already done for this revision 243 // and identity. 244 if e.nextPolicyRevision >= result.policyRevision && 245 e.desiredPolicy != nil && result.selectorPolicy != nil { 246 247 if !forcePolicyCompute { 248 if logger := e.getLogger(); logging.CanLogAt(logger.Logger, logrus.DebugLevel) { 249 e.getLogger().WithFields(logrus.Fields{ 250 "policyRevision.next": e.nextPolicyRevision, 251 "policyRevision.repo": result.policyRevision, 252 "policyChanged": e.nextPolicyRevision > e.policyRevision, 253 }).Debug("Skipping unnecessary endpoint policy recalculation") 254 } 255 repo.Mutex.RUnlock() 256 return result, nil 257 } else { 258 e.getLogger().Debug("Forced policy recalculation") 259 } 260 } 261 262 stats.policyCalculation.Start() 263 defer func() { stats.policyCalculation.End(err == nil) }() 264 if result.selectorPolicy == nil { 265 // Upon initial insertion or restore, there's currently no good 266 // trigger point to ensure that the security Identity is 267 // assigned after the endpoint is added to the endpointmanager 268 // (and hence also the identitymanager). In that case, detect 269 // that the selectorPolicy is not set and find it. 270 result.selectorPolicy = repo.GetPolicyCache().Lookup(securityIdentity) 271 if result.selectorPolicy == nil { 272 err := fmt.Errorf("no cached selectorPolicy found") 273 e.getLogger().WithError(err).Warning("Failed to regenerate from cached policy") 274 repo.Mutex.RUnlock() 275 return result, err 276 } 277 } 278 279 // UpdatePolicy ensures the SelectorPolicy is fully resolved. 280 // Endpoint lock must not be held! 281 // TODO: GH-7515: Consider ways to compute policy outside of the 282 // endpoint regeneration process, ideally as part of the policy change 283 // handler. 284 err = repo.GetPolicyCache().UpdatePolicy(securityIdentity) 285 if err != nil { 286 e.getLogger().WithError(err).Warning("Failed to update policy") 287 repo.Mutex.RUnlock() 288 return nil, err 289 } 290 repo.Mutex.RUnlock() // Done with policy repository; release this now as Consume() can be slow 291 292 // Consume converts a SelectorPolicy in to an EndpointPolicy 293 result.endpointPolicy = result.selectorPolicy.Consume(e) 294 return result, nil 295 } 296 297 // setDesiredPolicy updates the endpoint with the results of a policy calculation. 298 // 299 // The endpoint write lock must be held and not released until the desired policy has 300 // been pushed in to the policymaps via `syncPolicyMap`. This is so that we block 301 // ApplyPolicyMapChanges, which has the effect of blocking the ipcache from updating 302 // the ipcache bpf map. It is required that any pending changes are pushed in to 303 // the policymap before the ipcache map, otherwise endpoints could experience transient 304 // policy drops. 305 // 306 // Specifically, since policy is calculated asynchronously from the ipcacache's apply loop, 307 // it is probable that the new policy diverges from the bpf PolicyMap. So, we cannot safely 308 // consume incremental changes (and thus allow the ipcache to continue) until we have 309 // successfully performed a full sync with the endpoints PolicyMap. Otherwise, 310 // the ipcache may remove an identity from the ipcache that the bpf PolicyMap is still 311 // relying on. 312 func (e *Endpoint) setDesiredPolicy(res *policyGenerateResult) error { 313 // nil result means endpoint had no identity while policy was calculated 314 if res == nil { 315 if e.SecurityIdentity != nil { 316 e.getLogger().Info("Endpoint SecurityIdentity changed during policy regeneration") 317 return fmt.Errorf("endpoint %d SecurityIdentity changed during policy regeneration", e.ID) 318 } 319 320 return nil 321 } 322 // if the security identity changed, reject the policy computation 323 if e.identityRevision != res.identityRevision { 324 e.getLogger().Info("Endpoint SecurityIdentity changed during policy regeneration") 325 return fmt.Errorf("endpoint %d SecurityIdentity changed during policy regeneration", e.ID) 326 } 327 328 // Set the revision of this endpoint to the current revision of the policy 329 // repository. 330 e.setNextPolicyRevision(res.policyRevision) 331 e.selectorPolicy = res.selectorPolicy 332 e.desiredPolicy = res.endpointPolicy 333 334 return nil 335 } 336 337 // updateAndOverrideEndpointOptions updates the boolean configuration options for the endpoint 338 // based off of policy configuration, daemon policy enforcement mode, and any 339 // configuration options provided in opts. Returns whether the options changed 340 // from prior endpoint configuration. Note that the policy which applies 341 // to the endpoint, as well as the daemon's policy enforcement, may override 342 // configuration changes which were made via the API that were provided in opts. 343 // Must be called with endpoint mutex held. 344 func (e *Endpoint) updateAndOverrideEndpointOptions(opts option.OptionMap) (optsChanged bool) { 345 if opts == nil { 346 opts = make(option.OptionMap) 347 } 348 349 optsChanged = e.applyOptsLocked(opts) 350 return 351 } 352 353 // Called with e.mutex UNlocked 354 func (e *Endpoint) regenerate(ctx *regenerationContext) (retErr error) { 355 var revision uint64 356 var err error 357 358 ctx.Stats = regenerationStatistics{} 359 stats := &ctx.Stats 360 stats.totalTime.Start() 361 debugLogsEnabled := logging.CanLogAt(e.getLogger().Logger, logrus.DebugLevel) 362 363 if debugLogsEnabled { 364 e.getLogger().WithFields(logrus.Fields{ 365 logfields.StartTime: time.Now(), 366 logfields.Reason: ctx.Reason, 367 }).Debug("Regenerating endpoint") 368 } 369 370 defer func() { 371 // This has to be within a func(), not deferred directly, so that the 372 // value of retErr is passed in from when regenerate returns. 373 e.updateRegenerationStatistics(ctx, retErr) 374 }() 375 376 e.buildMutex.Lock() 377 defer e.buildMutex.Unlock() 378 379 stats.waitingForLock.Start() 380 // Check if endpoints is still alive before doing any build 381 err = e.lockAlive() 382 stats.waitingForLock.End(err == nil) 383 if err != nil { 384 return err 385 } 386 387 // When building the initial drop policy in waiting-for-identity state 388 // the state remains unchanged 389 // 390 // GH-5350: Remove this special case to require checking for StateWaitingForIdentity 391 if e.getState() != StateWaitingForIdentity && 392 !e.BuilderSetStateLocked(StateRegenerating, "Regenerating endpoint: "+ctx.Reason) { 393 if debugLogsEnabled { 394 e.getLogger().WithField(logfields.EndpointState, e.state).Debug("Skipping build due to invalid state") 395 } 396 e.unlock() 397 398 return fmt.Errorf("Skipping build due to invalid state: %s", e.state) 399 } 400 401 // Bump priority if higher priority event was skipped. 402 // This must be done in the same critical section as the state transition above. 403 if e.skippedRegenerationLevel > ctx.datapathRegenerationContext.regenerationLevel { 404 ctx.datapathRegenerationContext.regenerationLevel = e.skippedRegenerationLevel 405 } 406 // reset to the default lowest level 407 e.skippedRegenerationLevel = regeneration.Invalid 408 409 e.unlock() 410 411 stats.prepareBuild.Start() 412 origDir := e.StateDirectoryPath() 413 ctx.datapathRegenerationContext.currentDir = origDir 414 415 // This is the temporary directory to store the generated headers, 416 // the original existing directory is not overwritten until the 417 // entire generation process has succeeded. 418 tmpDir := e.NextDirectoryPath() 419 ctx.datapathRegenerationContext.nextDir = tmpDir 420 421 // Remove an eventual existing temporary directory that has been left 422 // over to make sure we can start the build from scratch 423 if err := e.removeDirectory(tmpDir); err != nil && !os.IsNotExist(err) { 424 stats.prepareBuild.End(false) 425 return fmt.Errorf("unable to remove old temporary directory: %w", err) 426 } 427 428 // Create temporary endpoint directory if it does not exist yet 429 if err := os.MkdirAll(tmpDir, 0777); err != nil { 430 stats.prepareBuild.End(false) 431 return fmt.Errorf("Failed to create endpoint directory: %w", err) 432 } 433 434 stats.prepareBuild.End(true) 435 436 defer func() { 437 if err := e.lockAlive(); err != nil { 438 if retErr == nil { 439 retErr = err 440 } else { 441 e.logDisconnectedMutexAction(err, "after regenerate") 442 } 443 return 444 } 445 446 // Guarntee removal of temporary directory regardless of outcome of 447 // build. If the build was successful, the temporary directory will 448 // have been moved to a new permanent location. If the build failed, 449 // the temporary directory will still exist and we will reomve it. 450 e.removeDirectory(tmpDir) 451 452 // Set to Ready, but only if no other changes are pending. 453 // State will remain as waiting-to-regenerate if further 454 // changes are needed. There should be an another regenerate 455 // queued for taking care of it. 456 e.BuilderSetStateLocked(StateReady, "Completed endpoint regeneration with no pending regeneration requests") 457 e.unlock() 458 }() 459 460 revision, err = e.regenerateBPF(ctx) 461 462 // Write full verifier log to the endpoint directory. 463 var ve *ebpf.VerifierError 464 if errors.As(err, &ve) { 465 p := path.Join(tmpDir, "verifier.log") 466 f, err := os.Create(p) 467 if err != nil { 468 return fmt.Errorf("creating endpoint verifier log file: %w", err) 469 } 470 defer f.Close() 471 if _, err := fmt.Fprintf(f, "%+v\n", ve); err != nil { 472 return fmt.Errorf("writing verifier log to endpoint directory: %w", err) 473 } 474 e.getLogger().WithFields(logrus.Fields{logfields.Path: p}). 475 Info("Wrote verifier log to endpoint directory") 476 } 477 478 if err != nil { 479 failDir := e.FailedDirectoryPath() 480 if !errors.Is(err, context.Canceled) { 481 e.getLogger().WithError(err).WithFields(logrus.Fields{logfields.Path: failDir}). 482 Info("generating BPF for endpoint failed, keeping stale directory") 483 } 484 485 // Remove an eventual existing previous failure directory 486 e.removeDirectory(failDir) 487 os.Rename(tmpDir, failDir) 488 return err 489 } 490 491 return e.updateRealizedState(stats, origDir, revision) 492 } 493 494 // updateRealizedState sets any realized state fields within the endpoint to 495 // be the desired state of the endpoint. This is only called after a successful 496 // regeneration of the endpoint. 497 func (e *Endpoint) updateRealizedState(stats *regenerationStatistics, origDir string, revision uint64) error { 498 // Update desired policy for endpoint because policy has now been realized 499 // in the datapath. PolicyMap state is not updated here, because that is 500 // performed in endpoint.syncPolicyMap(). 501 stats.waitingForLock.Start() 502 err := e.lockAlive() 503 stats.waitingForLock.End(err == nil) 504 if err != nil { 505 return err 506 } 507 508 defer e.unlock() 509 510 // Depending upon result of BPF regeneration (compilation executed), 511 // shift endpoint directories to match said BPF regeneration 512 // results. 513 err = e.synchronizeDirectories(origDir) 514 if err != nil { 515 return fmt.Errorf("error synchronizing endpoint BPF program directories: %w", err) 516 } 517 518 // Start periodic background full reconciliation of the policy map. 519 // Does nothing if it has already been started. 520 if !e.isProperty(PropertyFakeEndpoint) { 521 e.startSyncPolicyMapController() 522 } 523 524 if e.desiredPolicy != e.realizedPolicy { 525 // Remove references to the old policy 526 e.realizedPolicy.Detach() 527 // Set realized state to desired state. 528 e.realizedPolicy = e.desiredPolicy 529 } 530 531 // Mark the endpoint to be running the policy revision it was 532 // compiled for 533 e.setPolicyRevision(revision) 534 535 // Remove restored rules after successful regeneration 536 e.owner.RemoveRestoredDNSRules(e.ID) 537 538 return nil 539 } 540 541 func (e *Endpoint) updateRegenerationStatistics(ctx *regenerationContext, err error) { 542 success := err == nil 543 stats := &ctx.Stats 544 545 stats.totalTime.End(success) 546 stats.success = success 547 548 e.mutex.RLock() 549 stats.endpointID = e.ID 550 stats.policyStatus = e.policyStatus() 551 e.runlock() 552 stats.SendMetrics() 553 554 // Only add fields to the scoped logger if the criteria for logging a message is met, to avoid 555 // the expensive call to 'WithFields'. 556 scopedLog := e.getLogger() 557 if err != nil || logging.CanLogAt(scopedLog.Logger, logrus.DebugLevel) { 558 fields := logrus.Fields{ 559 logfields.Reason: ctx.Reason, 560 } 561 for field, stat := range stats.GetMap() { 562 fields[field] = stat.Total() 563 } 564 for field, stat := range stats.datapathRealization.GetMap() { 565 fields[field] = stat.Total() 566 } 567 scopedLog = scopedLog.WithFields(fields) 568 } 569 570 if err != nil { 571 if !errors.Is(err, context.Canceled) { 572 scopedLog.WithError(err).Warn("Regeneration of endpoint failed") 573 } 574 e.LogStatus(BPF, Failure, "Error regenerating endpoint: "+err.Error()) 575 return 576 } 577 578 scopedLog.Debug("Completed endpoint regeneration") 579 e.LogStatusOK(BPF, "Successfully regenerated endpoint program (Reason: "+ctx.Reason+")") 580 } 581 582 // SetRegenerateStateIfAlive tries to change the state of the endpoint for pending regeneration. 583 // Returns 'true' if 'e.Regenerate()' should be called after releasing the endpoint lock. 584 // Return 'false' if returned error is non-nil. 585 func (e *Endpoint) SetRegenerateStateIfAlive(regenMetadata *regeneration.ExternalRegenerationMetadata) (bool, error) { 586 regen := false 587 err := e.lockAlive() 588 if err != nil { 589 e.LogStatus(Policy, Failure, "Error while handling policy updates for endpoint: "+err.Error()) 590 } else { 591 regen = e.setRegenerateStateLocked(regenMetadata) 592 e.unlock() 593 } 594 return regen, err 595 } 596 597 // setRegenerateStateLocked tries to change the state of the endpoint for pending regeneration. 598 // returns 'true' if 'e.Regenerate()' should be called after releasing the endpoint lock. 599 func (e *Endpoint) setRegenerateStateLocked(regenMetadata *regeneration.ExternalRegenerationMetadata) bool { 600 var regen bool 601 state := e.getState() 602 switch state { 603 case StateRestoring, StateWaitingToRegenerate: 604 // Bump the skipped regeneration level if needed so that the existing/queued 605 // regeneration can regenerate on the required level. 606 if regenMetadata.RegenerationLevel > e.skippedRegenerationLevel { 607 e.skippedRegenerationLevel = regenMetadata.RegenerationLevel 608 e.logStatusLocked(Other, OK, fmt.Sprintf("Skipped duplicate endpoint regeneration level %s trigger due to %s", regenMetadata.RegenerationLevel.String(), regenMetadata.Reason)) 609 } else { 610 e.logStatusLocked(Other, OK, fmt.Sprintf("Skipped duplicate endpoint regeneration trigger due to %s", regenMetadata.Reason)) 611 } 612 regen = false 613 default: 614 regen = e.setState(StateWaitingToRegenerate, fmt.Sprintf("Triggering endpoint regeneration due to %s", regenMetadata.Reason)) 615 } 616 return regen 617 } 618 619 // RegenerateIfAlive queue a regeneration of this endpoint into the build queue 620 // of the endpoint and returns a channel that is closed when the regeneration of 621 // the endpoint is complete. The channel returns: 622 // - false if the regeneration failed 623 // - true if the regeneration succeed 624 // - nothing and the channel is closed if the regeneration did not happen 625 func (e *Endpoint) RegenerateIfAlive(regenMetadata *regeneration.ExternalRegenerationMetadata) <-chan bool { 626 regen, err := e.SetRegenerateStateIfAlive(regenMetadata) 627 if err != nil { 628 log.WithError(err).Debugf("Endpoint disappeared while queued to be regenerated: %s", regenMetadata.Reason) 629 } 630 if regen { 631 // Regenerate logs status according to the build success/failure 632 return e.Regenerate(regenMetadata) 633 } 634 635 ch := make(chan bool) 636 close(ch) 637 return ch 638 } 639 640 // Regenerate forces the regeneration of endpoint programs & policy 641 // Should only be called with e.state at StateWaitingToRegenerate, 642 // StateWaitingForIdentity, or StateRestoring 643 func (e *Endpoint) Regenerate(regenMetadata *regeneration.ExternalRegenerationMetadata) <-chan bool { 644 hr := e.GetReporter("datapath-regenerate") 645 done := make(chan bool, 1) 646 647 var ( 648 ctx context.Context 649 cFunc context.CancelFunc 650 ) 651 652 if regenMetadata.ParentContext != nil { 653 ctx, cFunc = context.WithCancel(regenMetadata.ParentContext) 654 } else { 655 ctx, cFunc = context.WithCancel(e.aliveCtx) 656 } 657 658 regenContext := ParseExternalRegenerationMetadata(ctx, cFunc, regenMetadata) 659 660 epEvent := eventqueue.NewEvent(&EndpointRegenerationEvent{ 661 regenContext: regenContext, 662 ep: e, 663 }) 664 665 // This may block if the Endpoint's EventQueue is full. This has to be done 666 // synchronously as some callers depend on the fact that the event is 667 // synchronously enqueued. 668 resChan, err := e.eventQueue.Enqueue(epEvent) 669 if err != nil { 670 e.getLogger().WithError(err).Error("Enqueue of EndpointRegenerationEvent failed") 671 done <- false 672 close(done) 673 return done 674 } 675 676 go func() { 677 // Free up resources with context. 678 defer cFunc() 679 680 var ( 681 buildSuccess bool 682 regenError error 683 canceled bool 684 ) 685 686 result, ok := <-resChan 687 if ok { 688 regenResult := result.(*EndpointRegenerationResult) 689 regenError = regenResult.err 690 buildSuccess = regenError == nil 691 692 if regenError != nil && !errors.Is(regenError, context.Canceled) { 693 e.getLogger().WithError(regenError).Error("endpoint regeneration failed") 694 hr.Degraded("Endpoint regeneration failed", regenError) 695 } else { 696 hr.OK("Endpoint regeneration successful") 697 } 698 } else { 699 // This may be unnecessary(?) since 'closing' of the results 700 // channel means that event has been cancelled? 701 e.getLogger().Debug("regeneration was cancelled") 702 canceled = true 703 } 704 705 // If a build is canceled, that means that the Endpoint is being deleted 706 // not that the build failed. 707 if !buildSuccess && !canceled { 708 select { 709 case e.regenFailedChan <- struct{}{}: 710 default: 711 // If we can't write to the channel, that means that it is 712 // full / a regeneration will occur - we don't have to 713 // do anything. 714 } 715 } 716 done <- buildSuccess 717 close(done) 718 }() 719 720 return done 721 } 722 723 var reasonRegenRetry = "retrying regeneration" 724 725 // startRegenerationFailureHandler waits for a build of the Endpoint to fail. 726 // Terminates when the given Endpoint is deleted. 727 // If a build fails, the controller tries to regenerate the 728 // Endpoint until it succeeds. Once the controller succeeds, it will not be 729 // ran again unless another build failure occurs. If the call to `Regenerate` 730 // fails inside of the controller, 731 func (e *Endpoint) startRegenerationFailureHandler() { 732 e.controllers.UpdateController(fmt.Sprintf("endpoint-%s-regeneration-recovery", e.StringID()), controller.ControllerParams{ 733 Group: endpointRegenerationRecoveryControllerGroup, 734 DoFunc: func(ctx context.Context) error { 735 select { 736 case <-e.regenFailedChan: 737 e.getLogger().Debug("received signal that regeneration failed") 738 case <-ctx.Done(): 739 e.getLogger().Debug("exiting retrying regeneration goroutine due to endpoint being deleted") 740 return nil 741 } 742 743 regenMetadata := ®eneration.ExternalRegenerationMetadata{ 744 // TODO (ianvernon) - is there a way we can plumb a parent 745 // context to a controller (e.g., endpoint.aliveCtx)? 746 ParentContext: ctx, 747 Reason: reasonRegenRetry, 748 // Completely rewrite the endpoint - we don't know the nature 749 // of the failure, simply that something failed. 750 RegenerationLevel: regeneration.RegenerateWithDatapath, 751 } 752 regen, _ := e.SetRegenerateStateIfAlive(regenMetadata) 753 if !regen { 754 // We don't need to regenerate because the endpoint is d 755 // disconnecting / is disconnected, or another regeneration has 756 // already been enqueued. Exit gracefully. 757 return nil 758 } 759 760 if success := <-e.Regenerate(regenMetadata); success { 761 return nil 762 } 763 return fmt.Errorf("regeneration recovery failed") 764 }, 765 ErrorRetryBaseDuration: 2 * time.Second, 766 Context: e.aliveCtx, 767 }) 768 } 769 770 func (e *Endpoint) notifyEndpointRegeneration(err error) { 771 reprerr := e.owner.SendNotification(monitorAPI.EndpointRegenMessage(e, err)) 772 if reprerr != nil { 773 e.getLogger().WithError(reprerr).Warn("Notifying monitor about endpoint regeneration failed") 774 } 775 } 776 777 // FormatGlobalEndpointID returns the global ID of endpoint in the format 778 // / <global ID Prefix>:<cluster name>:<node name>:<endpoint ID> as a string. 779 func (e *Endpoint) FormatGlobalEndpointID() string { 780 localNodeName := nodeTypes.GetName() 781 metadata := []string{endpointid.CiliumGlobalIdPrefix.String(), ipcache.AddressSpace, localNodeName, strconv.Itoa(int(e.ID))} 782 return strings.Join(metadata, ":") 783 } 784 785 // This synchronizes the key-value store with a mapping of the endpoint's IP 786 // with the numerical ID representing its security identity. 787 func (e *Endpoint) runIPIdentitySync(endpointIP netip.Addr) { 788 if option.Config.KVStore == "" || !endpointIP.IsValid() || option.Config.JoinCluster { 789 return 790 } 791 792 addressFamily := "IPv4" 793 if endpointIP.Is6() { 794 addressFamily = "IPv6" 795 } 796 797 e.controllers.UpdateController( 798 fmt.Sprintf("sync-%s-identity-mapping (%d)", addressFamily, e.ID), 799 controller.ControllerParams{ 800 Group: syncAddressIdentityMappingControllerGroup, 801 DoFunc: func(ctx context.Context) error { 802 if err := e.rlockAlive(); err != nil { 803 return controller.NewExitReason("Endpoint disappeared") 804 } 805 806 if e.SecurityIdentity == nil { 807 e.runlock() 808 return nil 809 } 810 811 ID := e.SecurityIdentity.ID 812 hostIP, ok := ip.AddrFromIP(node.GetIPv4()) 813 if !ok { 814 return controller.NewExitReason("Failed to convert node IPv4 address") 815 } 816 key := node.GetEndpointEncryptKeyIndex() 817 metadata := e.FormatGlobalEndpointID() 818 k8sNamespace := e.K8sNamespace 819 k8sPodName := e.K8sPodName 820 821 // Release lock as we do not want to have long-lasting key-value 822 // store operations resulting in lock being held for a long time. 823 e.runlock() 824 825 if err := ipcache.UpsertIPToKVStore(ctx, endpointIP, hostIP, ID, key, metadata, k8sNamespace, k8sPodName, e.GetK8sPorts()); err != nil { 826 return fmt.Errorf("unable to add endpoint IP mapping '%s'->'%d': %w", endpointIP.String(), ID, err) 827 } 828 return nil 829 }, 830 StopFunc: func(ctx context.Context) error { 831 ip := endpointIP.String() 832 if err := ipcache.DeleteIPFromKVStore(ctx, ip); err != nil { 833 return fmt.Errorf("unable to delete endpoint IP '%s' from ipcache: %w", ip, err) 834 } 835 return nil 836 }, 837 RunInterval: 5 * time.Minute, 838 Context: e.aliveCtx, 839 }, 840 ) 841 } 842 843 // SetIdentity resets endpoint's policy identity to 'id'. 844 // Caller triggers policy regeneration if needed. 845 // Called with e.mutex Lock()ed 846 func (e *Endpoint) SetIdentity(identity *identityPkg.Identity, newEndpoint bool) { 847 oldIdentity := "no identity" 848 if e.SecurityIdentity != nil { 849 oldIdentity = e.SecurityIdentity.StringID() 850 } 851 852 // Current security identity for endpoint is its old identity - delete its 853 // reference from global identity manager, add add a reference to the new 854 // identity for the endpoint. 855 if newEndpoint { 856 // TODO - GH-9354. 857 identitymanager.Add(identity) 858 } else { 859 identitymanager.RemoveOldAddNew(e.SecurityIdentity, identity) 860 } 861 e.SecurityIdentity = identity 862 e.replaceIdentityLabels(labels.LabelSourceAny, identity.Labels) 863 864 // Clear selectorPolicy. It will be determined at next regeneration. 865 e.selectorPolicy = nil 866 867 // Sets endpoint state to ready if was waiting for identity 868 if e.getState() == StateWaitingForIdentity { 869 e.setState(StateReady, "Set identity for this endpoint") 870 } 871 872 // Whenever the identity is updated, propagate change to key-value store 873 // of IP to identity mapping. 874 e.runIPIdentitySync(e.IPv4) 875 e.runIPIdentitySync(e.IPv6) 876 877 if oldIdentity != identity.StringID() { 878 e.getLogger().WithFields(logrus.Fields{ 879 logfields.Identity: identity.StringID(), 880 logfields.OldIdentity: oldIdentity, 881 logfields.IdentityLabels: identity.Labels.String(), 882 }).Info("Identity of endpoint changed") 883 } 884 e.UpdateLogger(map[string]interface{}{ 885 logfields.Identity: identity.StringID(), 886 }) 887 } 888 889 // AnnotationsResolverCB provides an implementation for resolving the pod 890 // annotations. 891 type AnnotationsResolverCB func(ns, podName string) (proxyVisibility string, err error) 892 893 // UpdateNoTrackRules updates the NOTRACK iptable rules for this endpoint. If anno 894 // is empty, then any existing NOTRACK rules will be removed. If anno cannot be parsed, 895 // we remove existing NOTRACK rules too if there's any. 896 func (e *Endpoint) UpdateNoTrackRules(annoCB AnnotationsResolverCB) { 897 ch, err := e.eventQueue.Enqueue(eventqueue.NewEvent(&EndpointNoTrackEvent{ 898 ep: e, 899 annoCB: annoCB, 900 })) 901 if err != nil { 902 e.getLogger().WithError(err).Error("Unable to enqueue endpoint notrack event") 903 return 904 } 905 906 updateRes := <-ch 907 regenResult, ok := updateRes.(*EndpointRegenerationResult) 908 if ok && regenResult.err != nil { 909 e.getLogger().WithError(regenResult.err).Error("EndpointNoTrackEvent event failed") 910 } 911 } 912 913 // UpdateVisibilityPolicy updates the visibility policy of this endpoint to 914 // reflect the state stored in the provided proxy visibility annotation. If anno 915 // is empty, then the VisibilityPolicy for the Endpoint will be empty, and will 916 // have no effect. If the proxy visibility annotation cannot be parsed, an empty 917 // visibility policy is assigned to the Endpoint. 918 func (e *Endpoint) UpdateVisibilityPolicy(annoCB AnnotationsResolverCB) { 919 ch, err := e.eventQueue.Enqueue(eventqueue.NewEvent(&EndpointPolicyVisibilityEvent{ 920 ep: e, 921 annoCB: annoCB, 922 })) 923 if err != nil { 924 e.getLogger().WithError(err).Error("Unable to enqueue endpoint policy visibility event") 925 return 926 } 927 928 updateRes := <-ch 929 regenResult, ok := updateRes.(*EndpointRegenerationResult) 930 if ok && regenResult.err != nil { 931 e.getLogger().WithError(regenResult.err).Error("EndpointPolicyVisibilityEvent event failed") 932 } 933 } 934 935 // UpdateBandwidthPolicy updates the egress bandwidth of this endpoint to 936 // progagate the throttle rate to the BPF data path. 937 func (e *Endpoint) UpdateBandwidthPolicy(bwm dptypes.BandwidthManager, annoCB AnnotationsResolverCB) { 938 ch, err := e.eventQueue.Enqueue(eventqueue.NewEvent(&EndpointPolicyBandwidthEvent{ 939 bwm: bwm, 940 ep: e, 941 annoCB: annoCB, 942 })) 943 if err != nil { 944 e.getLogger().WithError(err).Error("Unable to enqueue endpoint policy bandwidth event") 945 return 946 } 947 948 updateRes := <-ch 949 regenResult, ok := updateRes.(*EndpointRegenerationResult) 950 if ok && regenResult.err != nil { 951 e.getLogger().WithError(regenResult.err).Error("EndpointPolicyBandwidthEvent event failed") 952 } 953 } 954 955 // GetRealizedPolicyRuleLabelsForKey returns the list of policy rule labels 956 // which match a given flow key (in host byte-order). The returned 957 // LabelArrayList is shallow-copied and therefore must not be mutated. 958 // This function explicitly exported to be accessed by code outside of the 959 // Cilium source code tree and for testing. 960 func (e *Endpoint) GetRealizedPolicyRuleLabelsForKey(key policyTypes.Key) ( 961 derivedFrom labels.LabelArrayList, 962 revision uint64, 963 ok bool, 964 ) { 965 e.mutex.RLock() 966 defer e.mutex.RUnlock() 967 968 entry, ok := e.realizedPolicy.GetPolicyMap().Get(key) 969 if !ok { 970 return nil, 0, false 971 } 972 973 return entry.DerivedFromRules, e.policyRevision, true 974 }