github.com/noironetworks/cilium-net@v1.6.12/pkg/endpoint/policy.go (about)

     1  // Copyright 2016-2020 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package endpoint
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"os"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/cilium/cilium/common/addressing"
    26  	"github.com/cilium/cilium/pkg/completion"
    27  	"github.com/cilium/cilium/pkg/controller"
    28  	endpointid "github.com/cilium/cilium/pkg/endpoint/id"
    29  	"github.com/cilium/cilium/pkg/endpoint/regeneration"
    30  	"github.com/cilium/cilium/pkg/eventqueue"
    31  	identityPkg "github.com/cilium/cilium/pkg/identity"
    32  	"github.com/cilium/cilium/pkg/identity/identitymanager"
    33  	"github.com/cilium/cilium/pkg/ipcache"
    34  	k8sConst "github.com/cilium/cilium/pkg/k8s/apis/cilium.io"
    35  	"github.com/cilium/cilium/pkg/labels"
    36  	"github.com/cilium/cilium/pkg/logging/logfields"
    37  	monitorAPI "github.com/cilium/cilium/pkg/monitor/api"
    38  	"github.com/cilium/cilium/pkg/node"
    39  	"github.com/cilium/cilium/pkg/option"
    40  	"github.com/cilium/cilium/pkg/policy"
    41  	"github.com/cilium/cilium/pkg/revert"
    42  
    43  	"github.com/sirupsen/logrus"
    44  )
    45  
    46  // ProxyID returns a unique string to identify a proxy mapping.
    47  func (e *Endpoint) ProxyID(l4 *policy.L4Filter) string {
    48  	return policy.ProxyIDFromFilter(e.ID, l4)
    49  }
    50  
    51  // lookupRedirectPort returns the redirect L4 proxy port for the given L4
    52  // policy map key, in host byte order. Returns 0 if not found or the
    53  // filter doesn't require a redirect.
    54  // Must be called with Endpoint.Mutex held.
    55  func (e *Endpoint) LookupRedirectPort(l4Filter *policy.L4Filter) uint16 {
    56  	if !l4Filter.IsRedirect() {
    57  		return 0
    58  	}
    59  	proxyID := e.ProxyID(l4Filter)
    60  	return e.realizedRedirects[proxyID]
    61  }
    62  
    63  // Note that this function assumes that endpoint policy has already been generated!
    64  // must be called with endpoint.Mutex held for reading
    65  func (e *Endpoint) updateNetworkPolicy(proxyWaitGroup *completion.WaitGroup) (reterr error, revertFunc revert.RevertFunc) {
    66  	// Skip updating the NetworkPolicy if no identity has been computed for this
    67  	// endpoint.
    68  	// This breaks a circular dependency between configuring NetworkPolicies in
    69  	// sidecar Envoy proxies and those proxies needing network connectivity
    70  	// to get their initial configuration, which is required for them to ACK
    71  	// the NetworkPolicies.
    72  	if e.SecurityIdentity == nil {
    73  		return nil, nil
    74  	}
    75  
    76  	// If desired L4Policy is nil then no policy change is needed.
    77  	if e.desiredPolicy == nil || e.desiredPolicy.L4Policy == nil {
    78  		return nil, nil
    79  	}
    80  
    81  	// Publish the updated policy to L7 proxies.
    82  	return e.owner.UpdateNetworkPolicy(e, e.desiredPolicy.L4Policy, proxyWaitGroup)
    83  }
    84  
    85  func (e *Endpoint) useCurrentNetworkPolicy(proxyWaitGroup *completion.WaitGroup) {
    86  	if e.SecurityIdentity == nil {
    87  		return
    88  	}
    89  
    90  	// If desired L4Policy is nil then no policy change is needed.
    91  	if e.desiredPolicy == nil || e.desiredPolicy.L4Policy == nil {
    92  		return
    93  	}
    94  
    95  	// Wait for the current network policy to be acked
    96  	e.owner.UpdateNetworkPolicy(e, e.desiredPolicy.L4Policy, proxyWaitGroup)
    97  }
    98  
    99  // setNextPolicyRevision updates the desired policy revision field
   100  // Must be called with the endpoint lock held for at least reading
   101  func (e *Endpoint) setNextPolicyRevision(revision uint64) {
   102  	e.nextPolicyRevision = revision
   103  	e.UpdateLogger(map[string]interface{}{
   104  		logfields.DesiredPolicyRevision: e.nextPolicyRevision,
   105  	})
   106  }
   107  
   108  // regeneratePolicy computes the policy for the given endpoint based off of the
   109  // rules in regeneration.Owner's policy repository.
   110  //
   111  // Policy generation may fail, and in that case we exit before actually changing
   112  // the policy in any way, so that the last policy remains fully in effect if the
   113  // new policy can not be implemented. This is done on a per endpoint-basis,
   114  // however, and it is possible that policy update succeeds for some endpoints,
   115  // while it fails for other endpoints.
   116  //
   117  // Returns:
   118  //  - err: any error in obtaining information for computing policy, or if
   119  // policy could not be generated given the current set of rules in the
   120  // repository.
   121  // Must be called with endpoint mutex held.
   122  func (e *Endpoint) regeneratePolicy() (retErr error) {
   123  	var forceRegeneration bool
   124  
   125  	// No point in calculating policy if endpoint does not have an identity yet.
   126  	if e.SecurityIdentity == nil {
   127  		e.getLogger().Warn("Endpoint lacks identity, skipping policy calculation")
   128  		return nil
   129  	}
   130  
   131  	e.getLogger().Debug("Starting policy recalculation...")
   132  	stats := &policyRegenerationStatistics{}
   133  	stats.totalTime.Start()
   134  
   135  	stats.waitingForPolicyRepository.Start()
   136  	repo := e.owner.GetPolicyRepository()
   137  	repo.Mutex.RLock()
   138  	revision := repo.GetRevision()
   139  	defer repo.Mutex.RUnlock()
   140  	stats.waitingForPolicyRepository.End(true)
   141  
   142  	// Recompute policy for this endpoint only if not already done for this revision.
   143  	if !e.forcePolicyCompute && e.nextPolicyRevision >= revision {
   144  		e.getLogger().WithFields(logrus.Fields{
   145  			"policyRevision.next": e.nextPolicyRevision,
   146  			"policyRevision.repo": revision,
   147  			"policyChanged":       e.nextPolicyRevision > e.policyRevision,
   148  		}).Debug("Skipping unnecessary endpoint policy recalculation")
   149  
   150  		return nil
   151  	}
   152  
   153  	stats.policyCalculation.Start()
   154  	if e.selectorPolicy == nil {
   155  		// Upon initial insertion or restore, there's currently no good
   156  		// trigger point to ensure that the security Identity is
   157  		// assigned after the endpoint is added to the endpointmanager
   158  		// (and hence also the identitymanager). In that case, detect
   159  		// that the selectorPolicy is not set and find it.
   160  		e.selectorPolicy = repo.GetPolicyCache().Lookup(e.SecurityIdentity)
   161  		if e.selectorPolicy == nil {
   162  			err := fmt.Errorf("no cached selectorPolicy found")
   163  			e.getLogger().WithError(err).Warning("Failed to regenerate from cached policy")
   164  			return err
   165  		}
   166  	}
   167  	// TODO: GH-7515: This should be triggered closer to policy change
   168  	// handlers, but for now let's just update it here.
   169  	if err := repo.GetPolicyCache().UpdatePolicy(e.SecurityIdentity); err != nil {
   170  		e.getLogger().WithError(err).Warning("Failed to update policy")
   171  		return err
   172  	}
   173  	calculatedPolicy := e.selectorPolicy.Consume(e)
   174  	stats.policyCalculation.End(true)
   175  
   176  	// This marks the e.desiredPolicy different from the previously realized policy
   177  	e.desiredPolicy = calculatedPolicy
   178  
   179  	if e.forcePolicyCompute {
   180  		forceRegeneration = true     // Options were changed by the caller.
   181  		e.forcePolicyCompute = false // Policies just computed
   182  		e.getLogger().Debug("Forced policy recalculation")
   183  	}
   184  
   185  	// Set the revision of this endpoint to the current revision of the policy
   186  	// repository.
   187  	e.setNextPolicyRevision(revision)
   188  
   189  	e.updatePolicyRegenerationStatistics(stats, forceRegeneration, retErr)
   190  
   191  	return nil
   192  }
   193  
   194  func (e *Endpoint) updatePolicyRegenerationStatistics(stats *policyRegenerationStatistics, forceRegeneration bool, err error) {
   195  	success := err == nil
   196  
   197  	stats.totalTime.End(success)
   198  	stats.success = success
   199  
   200  	stats.SendMetrics()
   201  
   202  	fields := logrus.Fields{
   203  		"waitingForIdentityCache":    stats.waitingForIdentityCache,
   204  		"waitingForPolicyRepository": stats.waitingForPolicyRepository,
   205  		"policyCalculation":          stats.policyCalculation,
   206  		"forcedRegeneration":         forceRegeneration,
   207  	}
   208  	scopedLog := e.getLogger().WithFields(fields)
   209  
   210  	if err != nil {
   211  		scopedLog.WithError(err).Warn("Regeneration of policy failed")
   212  		return
   213  	}
   214  
   215  	scopedLog.Debug("Completed endpoint policy recalculation")
   216  }
   217  
   218  // updateAndOverrideEndpointOptions updates the boolean configuration options for the endpoint
   219  // based off of policy configuration, daemon policy enforcement mode, and any
   220  // configuration options provided in opts. Returns whether the options changed
   221  // from prior endpoint configuration. Note that the policy which applies
   222  // to the endpoint, as well as the daemon's policy enforcement, may override
   223  // configuration changes which were made via the API that were provided in opts.
   224  // Must be called with endpoint mutex held.
   225  func (e *Endpoint) updateAndOverrideEndpointOptions(opts option.OptionMap) (optsChanged bool) {
   226  	if opts == nil {
   227  		opts = make(option.OptionMap)
   228  	}
   229  	// Apply possible option changes before regenerating maps, as map regeneration
   230  	// depends on the conntrack options
   231  	if e.desiredPolicy != nil && e.desiredPolicy.L4Policy != nil {
   232  		if e.desiredPolicy.L4Policy.RequiresConntrack() {
   233  			opts[option.Conntrack] = option.OptionEnabled
   234  		}
   235  	}
   236  
   237  	optsChanged = e.applyOptsLocked(opts)
   238  	return
   239  }
   240  
   241  // Called with e.Mutex UNlocked
   242  func (e *Endpoint) regenerate(context *regenerationContext) (retErr error) {
   243  	var revision uint64
   244  	var stateDirComplete bool
   245  	var err error
   246  
   247  	context.Stats = regenerationStatistics{}
   248  	stats := &context.Stats
   249  	stats.totalTime.Start()
   250  	e.getLogger().WithFields(logrus.Fields{
   251  		logfields.StartTime: time.Now(),
   252  		logfields.Reason:    context.Reason,
   253  	}).Debug("Regenerating endpoint")
   254  
   255  	defer func() {
   256  		// This has to be within a func(), not deferred directly, so that the
   257  		// value of retErr is passed in from when regenerate returns.
   258  		e.updateRegenerationStatistics(context, retErr)
   259  	}()
   260  
   261  	e.BuildMutex.Lock()
   262  	defer e.BuildMutex.Unlock()
   263  
   264  	stats.waitingForLock.Start()
   265  	// Check if endpoints is still alive before doing any build
   266  	err = e.LockAlive()
   267  	stats.waitingForLock.End(err == nil)
   268  	if err != nil {
   269  		return err
   270  	}
   271  
   272  	// When building the initial drop policy in waiting-for-identity state
   273  	// the state remains unchanged
   274  	//
   275  	// GH-5350: Remove this special case to require checking for StateWaitingForIdentity
   276  	if e.GetStateLocked() != StateWaitingForIdentity &&
   277  		!e.BuilderSetStateLocked(StateRegenerating, "Regenerating endpoint: "+context.Reason) {
   278  		e.getLogger().WithField(logfields.EndpointState, e.state).Debug("Skipping build due to invalid state")
   279  		e.Unlock()
   280  
   281  		return fmt.Errorf("Skipping build due to invalid state: %s", e.state)
   282  	}
   283  
   284  	e.Unlock()
   285  
   286  	stats.prepareBuild.Start()
   287  	origDir := e.StateDirectoryPath()
   288  	context.datapathRegenerationContext.currentDir = origDir
   289  
   290  	// This is the temporary directory to store the generated headers,
   291  	// the original existing directory is not overwritten until the
   292  	// entire generation process has succeeded.
   293  	tmpDir := e.NextDirectoryPath()
   294  	context.datapathRegenerationContext.nextDir = tmpDir
   295  
   296  	// Remove an eventual existing temporary directory that has been left
   297  	// over to make sure we can start the build from scratch
   298  	if err := e.removeDirectory(tmpDir); err != nil && !os.IsNotExist(err) {
   299  		stats.prepareBuild.End(false)
   300  		return fmt.Errorf("unable to remove old temporary directory: %s", err)
   301  	}
   302  
   303  	// Create temporary endpoint directory if it does not exist yet
   304  	if err := os.MkdirAll(tmpDir, 0777); err != nil {
   305  		stats.prepareBuild.End(false)
   306  		return fmt.Errorf("Failed to create endpoint directory: %s", err)
   307  	}
   308  
   309  	stats.prepareBuild.End(true)
   310  
   311  	defer func() {
   312  		if err := e.LockAlive(); err != nil {
   313  			if retErr == nil {
   314  				retErr = err
   315  			} else {
   316  				e.LogDisconnectedMutexAction(err, "after regenerate")
   317  			}
   318  			return
   319  		}
   320  
   321  		// Guarntee removal of temporary directory regardless of outcome of
   322  		// build. If the build was successful, the temporary directory will
   323  		// have been moved to a new permanent location. If the build failed,
   324  		// the temporary directory will still exist and we will reomve it.
   325  		e.removeDirectory(tmpDir)
   326  
   327  		// Set to Ready, but only if no other changes are pending.
   328  		// State will remain as waiting-to-regenerate if further
   329  		// changes are needed. There should be an another regenerate
   330  		// queued for taking care of it.
   331  		e.BuilderSetStateLocked(StateReady, "Completed endpoint regeneration with no pending regeneration requests")
   332  		e.Unlock()
   333  	}()
   334  
   335  	revision, stateDirComplete, err = e.regenerateBPF(context)
   336  	if err != nil {
   337  		failDir := e.FailedDirectoryPath()
   338  		e.getLogger().WithFields(logrus.Fields{
   339  			logfields.Path: failDir,
   340  		}).Warn("generating BPF for endpoint failed, keeping stale directory.")
   341  
   342  		// Remove an eventual existing previous failure directory
   343  		e.removeDirectory(failDir)
   344  		os.Rename(tmpDir, failDir)
   345  		return err
   346  	}
   347  
   348  	return e.updateRealizedState(stats, origDir, revision, stateDirComplete)
   349  }
   350  
   351  // updateRealizedState sets any realized state fields within the endpoint to
   352  // be the desired state of the endpoint. This is only called after a successful
   353  // regeneration of the endpoint.
   354  func (e *Endpoint) updateRealizedState(stats *regenerationStatistics, origDir string, revision uint64, stateDirComplete bool) error {
   355  	// Update desired policy for endpoint because policy has now been realized
   356  	// in the datapath. PolicyMap state is not updated here, because that is
   357  	// performed in endpoint.syncPolicyMap().
   358  	stats.waitingForLock.Start()
   359  	err := e.LockAlive()
   360  	stats.waitingForLock.End(err == nil)
   361  	if err != nil {
   362  		return err
   363  	}
   364  
   365  	defer e.Unlock()
   366  
   367  	// Depending upon result of BPF regeneration (compilation executed),
   368  	// shift endpoint directories to match said BPF regeneration
   369  	// results.
   370  	err = e.synchronizeDirectories(origDir, stateDirComplete)
   371  	if err != nil {
   372  		return fmt.Errorf("error synchronizing endpoint BPF program directories: %s", err)
   373  	}
   374  
   375  	// Keep PolicyMap for this endpoint in sync with desired / realized state.
   376  	if !option.Config.DryMode {
   377  		e.syncPolicyMapController()
   378  	}
   379  
   380  	e.realizedBPFConfig = e.desiredBPFConfig
   381  
   382  	// Set realized state to desired state.
   383  	e.realizedPolicy = e.desiredPolicy
   384  
   385  	// Mark the endpoint to be running the policy revision it was
   386  	// compiled for
   387  	e.setPolicyRevision(revision)
   388  
   389  	return nil
   390  }
   391  
   392  func (e *Endpoint) updateRegenerationStatistics(context *regenerationContext, err error) {
   393  	success := err == nil
   394  	stats := &context.Stats
   395  
   396  	stats.totalTime.End(success)
   397  	stats.success = success
   398  
   399  	e.mutex.RLock()
   400  	stats.endpointID = e.ID
   401  	stats.policyStatus = e.policyStatus()
   402  	e.RUnlock()
   403  	stats.SendMetrics()
   404  
   405  	fields := logrus.Fields{
   406  		logfields.Reason: context.Reason,
   407  	}
   408  	for field, stat := range stats.GetMap() {
   409  		fields[field] = stat.Total()
   410  	}
   411  	for field, stat := range stats.datapathRealization.GetMap() {
   412  		fields[field] = stat.Total()
   413  	}
   414  	scopedLog := e.getLogger().WithFields(fields)
   415  
   416  	if err != nil {
   417  		scopedLog.WithError(err).Warn("Regeneration of endpoint failed")
   418  		e.LogStatus(BPF, Failure, "Error regenerating endpoint: "+err.Error())
   419  		return
   420  	}
   421  
   422  	scopedLog.Debug("Completed endpoint regeneration")
   423  	e.LogStatusOK(BPF, "Successfully regenerated endpoint program (Reason: "+context.Reason+")")
   424  }
   425  
   426  // RegenerateIfAlive queue a regeneration of this endpoint into the build queue
   427  // of the endpoint and returns a channel that is closed when the regeneration of
   428  // the endpoint is complete. The channel returns:
   429  //  - false if the regeneration failed
   430  //  - true if the regeneration succeed
   431  //  - nothing and the channel is closed if the regeneration did not happen
   432  func (e *Endpoint) RegenerateIfAlive(regenMetadata *regeneration.ExternalRegenerationMetadata) <-chan bool {
   433  	if err := e.LockAlive(); err != nil {
   434  		log.WithError(err).Warnf("Endpoint disappeared while queued to be regenerated: %s", regenMetadata.Reason)
   435  		e.LogStatus(Policy, Failure, "Error while handling policy updates for endpoint: "+err.Error())
   436  	} else {
   437  		var regen bool
   438  		state := e.GetStateLocked()
   439  		switch state {
   440  		case StateRestoring, StateWaitingToRegenerate:
   441  			e.SetStateLocked(state, fmt.Sprintf("Skipped duplicate endpoint regeneration trigger due to %s", regenMetadata.Reason))
   442  			regen = false
   443  		default:
   444  			regen = e.SetStateLocked(StateWaitingToRegenerate, fmt.Sprintf("Triggering endpoint regeneration due to %s", regenMetadata.Reason))
   445  		}
   446  		e.Unlock()
   447  		if regen {
   448  			// Regenerate logs status according to the build success/failure
   449  			return e.Regenerate(regenMetadata)
   450  		}
   451  	}
   452  
   453  	ch := make(chan bool)
   454  	close(ch)
   455  	return ch
   456  }
   457  
   458  // Regenerate forces the regeneration of endpoint programs & policy
   459  // Should only be called with e.state == StateWaitingToRegenerate or with
   460  // e.state == StateWaitingForIdentity
   461  func (e *Endpoint) Regenerate(regenMetadata *regeneration.ExternalRegenerationMetadata) <-chan bool {
   462  	done := make(chan bool, 1)
   463  
   464  	var (
   465  		ctx   context.Context
   466  		cFunc context.CancelFunc
   467  	)
   468  
   469  	if regenMetadata.ParentContext != nil {
   470  		ctx, cFunc = context.WithCancel(regenMetadata.ParentContext)
   471  	} else {
   472  		ctx, cFunc = context.WithCancel(context.Background())
   473  	}
   474  
   475  	regenContext := ParseExternalRegenerationMetadata(ctx, cFunc, regenMetadata)
   476  
   477  	epEvent := eventqueue.NewEvent(&EndpointRegenerationEvent{
   478  		regenContext: regenContext,
   479  		ep:           e,
   480  	})
   481  
   482  	// This may block if the Endpoint's EventQueue is full. This has to be done
   483  	// synchronously as some callers depend on the fact that the event is
   484  	// synchronously enqueued.
   485  	resChan, err := e.EventQueue.Enqueue(epEvent)
   486  	if err != nil {
   487  		e.getLogger().Errorf("enqueue of EndpointRegenerationEvent failed: %s", err)
   488  		done <- false
   489  		close(done)
   490  		return done
   491  	}
   492  
   493  	go func() {
   494  
   495  		// Free up resources with context.
   496  		defer cFunc()
   497  
   498  		var (
   499  			buildSuccess bool
   500  			regenError   error
   501  			canceled     bool
   502  		)
   503  
   504  		select {
   505  		case result, ok := <-resChan:
   506  			if ok {
   507  				regenResult := result.(*EndpointRegenerationResult)
   508  				regenError = regenResult.err
   509  				buildSuccess = regenError == nil
   510  
   511  				if regenError != nil {
   512  					e.getLogger().WithError(regenError).Error("endpoint regeneration failed")
   513  				}
   514  			} else {
   515  				// This may be unnecessary(?) since 'closing' of the results
   516  				// channel means that event has been cancelled?
   517  				e.getLogger().Debug("regeneration was cancelled")
   518  				canceled = true
   519  			}
   520  		}
   521  
   522  		// If a build is canceled, that means that the Endpoint is being deleted
   523  		// not that the build failed.
   524  		if !buildSuccess && !canceled {
   525  			select {
   526  			case e.regenFailedChan <- struct{}{}:
   527  			default:
   528  				// If we can't write to the channel, that means that it is
   529  				// full / a regeneration will occur - we don't have to
   530  				// do anything.
   531  			}
   532  		}
   533  		done <- buildSuccess
   534  		close(done)
   535  	}()
   536  
   537  	return done
   538  }
   539  
   540  var reasonRegenRetry = "retrying regeneration"
   541  
   542  // StartRegenerationFailureHandler is a wrapper of
   543  // startRegenerationFailureHandler, this function was created for the backports
   544  // of an upstream commit.
   545  func (e *Endpoint) StartRegenerationFailureHandler() {
   546  	e.startRegenerationFailureHandler()
   547  }
   548  
   549  // startRegenerationFailureHandler waits for a build of the Endpoint to fail.
   550  // Terminates when the given Endpoint is deleted.
   551  // If a build fails, the controller tries to regenerate the
   552  // Endpoint until it succeeds. Once the controller succeeds, it will not be
   553  // ran again unless another build failure occurs. If the call to `Regenerate`
   554  // fails inside of the controller,
   555  func (e *Endpoint) startRegenerationFailureHandler() {
   556  	e.controllers.UpdateController(fmt.Sprintf("endpoint-%s-regeneration-recovery", e.StringID()), controller.ControllerParams{
   557  		DoFunc: func(ctx context.Context) error {
   558  			select {
   559  			case <-e.regenFailedChan:
   560  				e.getLogger().Debug("received signal that regeneration failed")
   561  			case <-ctx.Done():
   562  				e.getLogger().Debug("exiting retrying regeneration goroutine due to endpoint being deleted")
   563  				return nil
   564  			}
   565  
   566  			if err := e.LockAlive(); err != nil {
   567  				// We don't need to regenerate because the endpoint is d
   568  				// disconnecting / is disconnected, exit gracefully.
   569  				return nil
   570  			}
   571  
   572  			stateTransitionSucceeded := e.SetStateLocked(StateWaitingToRegenerate, reasonRegenRetry)
   573  			e.Unlock()
   574  			if !stateTransitionSucceeded {
   575  				// Another regeneration has already been enqueued.
   576  				return nil
   577  			}
   578  
   579  			r := &regeneration.ExternalRegenerationMetadata{
   580  				// TODO (ianvernon) - is there a way we can plumb a parent
   581  				// context to a controller (e.g., endpoint.aliveCtx)?
   582  				ParentContext: ctx,
   583  				Reason:        reasonRegenRetry,
   584  				// Completely rewrite the endpoint - we don't know the nature
   585  				// of the failure, simply that something failed.
   586  				RegenerationLevel: regeneration.RegenerateWithDatapathRewrite,
   587  			}
   588  			if success := <-e.Regenerate(r); success {
   589  				return nil
   590  			}
   591  			return fmt.Errorf("regeneration recovery failed")
   592  		},
   593  		ErrorRetryBaseDuration: 2 * time.Second,
   594  	})
   595  }
   596  
   597  func (e *Endpoint) notifyEndpointRegeneration(err error) {
   598  	repr, reprerr := monitorAPI.EndpointRegenRepr(e, err)
   599  	if reprerr != nil {
   600  		e.getLogger().WithError(reprerr).Warn("Notifying monitor about endpoint regeneration failed")
   601  	}
   602  
   603  	if err != nil {
   604  		if reprerr == nil && !option.Config.DryMode {
   605  			e.owner.SendNotification(monitorAPI.AgentNotifyEndpointRegenerateFail, repr)
   606  		}
   607  	} else {
   608  		if reprerr == nil && !option.Config.DryMode {
   609  			e.owner.SendNotification(monitorAPI.AgentNotifyEndpointRegenerateSuccess, repr)
   610  		}
   611  	}
   612  }
   613  
   614  // FormatGlobalEndpointID returns the global ID of endpoint in the format
   615  // / <global ID Prefix>:<cluster name>:<node name>:<endpoint ID> as a string.
   616  func (e *Endpoint) FormatGlobalEndpointID() string {
   617  	localNodeName := node.GetName()
   618  	metadata := []string{endpointid.CiliumGlobalIdPrefix.String(), ipcache.AddressSpace, localNodeName, strconv.Itoa(int(e.ID))}
   619  	return strings.Join(metadata, ":")
   620  }
   621  
   622  // This synchronizes the key-value store with a mapping of the endpoint's IP
   623  // with the numerical ID representing its security identity.
   624  func (e *Endpoint) runIPIdentitySync(endpointIP addressing.CiliumIP) {
   625  	if option.Config.KVStore == "" || !endpointIP.IsSet() {
   626  		return
   627  	}
   628  
   629  	addressFamily := endpointIP.GetFamilyString()
   630  
   631  	e.controllers.UpdateController(fmt.Sprintf("sync-%s-identity-mapping (%d)", addressFamily, e.ID),
   632  		controller.ControllerParams{
   633  			DoFunc: func(ctx context.Context) error {
   634  				if err := e.RLockAlive(); err != nil {
   635  					return controller.NewExitReason("Endpoint disappeared")
   636  				}
   637  
   638  				if e.SecurityIdentity == nil {
   639  					e.RUnlock()
   640  					return nil
   641  				}
   642  
   643  				IP := endpointIP.IP()
   644  				ID := e.SecurityIdentity.ID
   645  				hostIP := node.GetExternalIPv4()
   646  				key := node.GetIPsecKeyIdentity()
   647  				metadata := e.FormatGlobalEndpointID()
   648  
   649  				// Release lock as we do not want to have long-lasting key-value
   650  				// store operations resulting in lock being held for a long time.
   651  				e.RUnlock()
   652  
   653  				if err := ipcache.UpsertIPToKVStore(ctx, IP, hostIP, ID, key, metadata); err != nil {
   654  					return fmt.Errorf("unable to add endpoint IP mapping '%s'->'%d': %s", IP.String(), ID, err)
   655  				}
   656  				return nil
   657  			},
   658  			StopFunc: func(ctx context.Context) error {
   659  				ip := endpointIP.String()
   660  				if err := ipcache.DeleteIPFromKVStore(ctx, ip); err != nil {
   661  					return fmt.Errorf("unable to delete endpoint IP '%s' from ipcache: %s", ip, err)
   662  				}
   663  				return nil
   664  			},
   665  			RunInterval: 5 * time.Minute,
   666  		},
   667  	)
   668  }
   669  
   670  // SetIdentity resets endpoint's policy identity to 'id'.
   671  // Caller triggers policy regeneration if needed.
   672  // Called with e.Mutex Locked
   673  func (e *Endpoint) SetIdentity(identity *identityPkg.Identity, newEndpoint bool) {
   674  
   675  	// Set a boolean flag to indicate whether the endpoint has been injected by
   676  	// Istio with a Cilium-compatible sidecar proxy.
   677  	istioSidecarProxyLabel, found := identity.Labels[k8sConst.PolicyLabelIstioSidecarProxy]
   678  	e.hasSidecarProxy = found &&
   679  		istioSidecarProxyLabel.Source == labels.LabelSourceK8s &&
   680  		strings.ToLower(istioSidecarProxyLabel.Value) == "true"
   681  
   682  	oldIdentity := "no identity"
   683  	if e.SecurityIdentity != nil {
   684  		oldIdentity = e.SecurityIdentity.StringID()
   685  	}
   686  
   687  	// Current security identity for endpoint is its old identity - delete its
   688  	// reference from global identity manager, add add a reference to the new
   689  	// identity for the endpoint.
   690  	if newEndpoint {
   691  		identitymanager.Add(identity)
   692  	} else {
   693  		identitymanager.RemoveOldAddNew(e.SecurityIdentity, identity)
   694  	}
   695  	e.SecurityIdentity = identity
   696  	e.replaceIdentityLabels(identity.Labels)
   697  
   698  	// Clear selectorPolicy. It will be determined at next regeneration.
   699  	e.selectorPolicy = nil
   700  
   701  	// Sets endpoint state to ready if was waiting for identity
   702  	if e.GetStateLocked() == StateWaitingForIdentity {
   703  		e.SetStateLocked(StateReady, "Set identity for this endpoint")
   704  	}
   705  
   706  	// Whenever the identity is updated, propagate change to key-value store
   707  	// of IP to identity mapping.
   708  	e.runIPIdentitySync(e.IPv4)
   709  	e.runIPIdentitySync(e.IPv6)
   710  
   711  	if oldIdentity != identity.StringID() {
   712  		e.getLogger().WithFields(logrus.Fields{
   713  			logfields.Identity:       identity.StringID(),
   714  			logfields.OldIdentity:    oldIdentity,
   715  			logfields.IdentityLabels: identity.Labels.String(),
   716  		}).Info("Identity of endpoint changed")
   717  	}
   718  	e.UpdateLogger(map[string]interface{}{
   719  		logfields.Identity: identity.StringID(),
   720  	})
   721  }
   722  
   723  // GetCIDRPrefixLengths returns the sorted list of unique prefix lengths used
   724  // for CIDR policy or IPcache lookup from this endpoint.
   725  func (e *Endpoint) GetCIDRPrefixLengths() (s6, s4 []int) {
   726  	if e.desiredPolicy == nil || e.desiredPolicy.CIDRPolicy == nil {
   727  		return policy.GetDefaultPrefixLengths()
   728  	}
   729  	return e.desiredPolicy.CIDRPolicy.ToBPFData()
   730  }