github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/endpointsynchronizer.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package endpointmanager
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  
    12  	"github.com/blang/semver/v4"
    13  	"github.com/cilium/hive/cell"
    14  	"github.com/sirupsen/logrus"
    15  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    16  	meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    17  	k8stypes "k8s.io/apimachinery/pkg/types"
    18  
    19  	"github.com/cilium/cilium/pkg/controller"
    20  	"github.com/cilium/cilium/pkg/endpoint"
    21  	"github.com/cilium/cilium/pkg/k8s"
    22  	cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    23  	"github.com/cilium/cilium/pkg/k8s/client"
    24  	v2 "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned/typed/cilium.io/v2"
    25  	k8sversion "github.com/cilium/cilium/pkg/k8s/version"
    26  	pkgLabels "github.com/cilium/cilium/pkg/labels"
    27  	"github.com/cilium/cilium/pkg/logging/logfields"
    28  	"github.com/cilium/cilium/pkg/node"
    29  	"github.com/cilium/cilium/pkg/node/types"
    30  	"github.com/cilium/cilium/pkg/option"
    31  	"github.com/cilium/cilium/pkg/time"
    32  )
    33  
    34  const (
    35  	// subsysEndpointSync is the value for logfields.LogSubsys
    36  	subsysEndpointSync = "endpointsynchronizer"
    37  )
    38  
    39  var ciliumEndpointToK8sSyncControllerGroup = controller.NewGroup("sync-to-k8s-ciliumendpoint")
    40  
    41  // EndpointSynchronizer currently is an empty type, which wraps around syncing
    42  // of CiliumEndpoint resources.
    43  type EndpointSynchronizer struct {
    44  	Clientset client.Clientset
    45  }
    46  
    47  // RunK8sCiliumEndpointSync starts a controller that synchronizes the endpoint
    48  // to the corresponding k8s CiliumEndpoint CRD. It is expected that each CEP
    49  // has 1 controller that updates it, and a local copy is retained and only
    50  // updates are pushed up.
    51  // CiliumEndpoint objects have the same name as the pod they represent.
    52  func (epSync *EndpointSynchronizer) RunK8sCiliumEndpointSync(e *endpoint.Endpoint, h cell.Health) {
    53  	var (
    54  		endpointID     = e.ID
    55  		controllerName = endpoint.EndpointSyncControllerName(endpointID)
    56  		scopedLog      = e.Logger(subsysEndpointSync).WithFields(logrus.Fields{
    57  			"controller": controllerName,
    58  			"endpointID": e.ID,
    59  		})
    60  	)
    61  
    62  	if option.Config.DisableCiliumEndpointCRD {
    63  		h.Stopped("ciliumendpoint CRD disabled")
    64  		scopedLog.Debug("Not running controller. CEP CRD synchronization is disabled")
    65  		return
    66  	}
    67  
    68  	if !epSync.Clientset.IsEnabled() {
    69  		h.Stopped("k8s client-set disabled")
    70  		scopedLog.Debug("Not starting controller because k8s is disabled")
    71  		return
    72  	}
    73  
    74  	ciliumClient := epSync.Clientset.CiliumV2()
    75  
    76  	// The health endpoint doesn't really exist in k8s and updates to it caused
    77  	// arbitrary errors. Disable the controller for these endpoints.
    78  	if isHealthEP := e.HasLabels(pkgLabels.LabelHealth); isHealthEP {
    79  		h.Stopped("Cilium health endpoint has no CEP object for k8s sync")
    80  		scopedLog.Debug("Not starting unnecessary CEP controller for cilium-health endpoint")
    81  		return
    82  	}
    83  
    84  	// The CEP name is derived from the K8sPodName and K8sNamespace.
    85  	// They should always be available if an endpoint belongs to a pod.
    86  	cepName := e.GetK8sCEPName()
    87  	if cepName == "" {
    88  		h.Stopped("Endpoint synchronizer stopped due to missing CEP metadata")
    89  		scopedLog.Debug("Skipping CiliumEndpoint update because it has no k8s cep name")
    90  		return
    91  	}
    92  
    93  	var (
    94  		lastMdl  *cilium_v2.EndpointStatus
    95  		localCEP *cilium_v2.CiliumEndpoint // the local copy of the CEP object. Reused.
    96  		needInit = true                    // needInit indicates that we may need to create the CEP
    97  		firstTry = true                    // Try to get CEP from k8s cache
    98  	)
    99  
   100  	// NOTE: The controller functions do NOT hold the endpoint locks
   101  	e.UpdateController(controllerName,
   102  		controller.ControllerParams{
   103  			Group:       ciliumEndpointToK8sSyncControllerGroup,
   104  			RunInterval: 10 * time.Second,
   105  			Health:      h,
   106  			DoFunc: func(ctx context.Context) (err error) {
   107  				// Update logger as scopeLog might not have the podName when it
   108  				// was created.
   109  				scopedLog = e.Logger(subsysEndpointSync).WithField("controller", controllerName)
   110  
   111  				if k8sversion.Version().Equals(semver.Version{}) {
   112  					return fmt.Errorf("Kubernetes apiserver is not available")
   113  				}
   114  
   115  				cepOwner := e.GetCEPOwner()
   116  				if cepOwner.IsNil() {
   117  					scopedLog.Debug("Skipping CiliumEndpoint update because it has no k8s namespace")
   118  					return nil
   119  				}
   120  
   121  				if !e.HaveK8sMetadata() {
   122  					scopedLog.Debug("Skipping CiliumEndpoint update because k8s metadata is not yet available")
   123  					return nil
   124  				}
   125  
   126  				identity, err := e.GetSecurityIdentity()
   127  				if err != nil {
   128  					return err
   129  				}
   130  				if identity == nil {
   131  					scopedLog.Debug("Skipping CiliumEndpoint update because security identity is not yet available")
   132  					return nil
   133  				}
   134  
   135  				// Serialize the endpoint into a model. It is compared with the one
   136  				// from before, only updating on changes.
   137  				mdl := e.GetCiliumEndpointStatus()
   138  				if !needInit && mdl.DeepEqual(lastMdl) {
   139  					scopedLog.Debug("Skipping CiliumEndpoint update because it has not changed")
   140  					return nil
   141  				}
   142  
   143  				if needInit {
   144  					state := e.GetState()
   145  					// Don't bother to create if the
   146  					// endpoint is already disconnecting
   147  					if state == endpoint.StateDisconnecting ||
   148  						state == endpoint.StateDisconnected {
   149  						return nil
   150  					}
   151  
   152  					scopedLog.Debug("Getting CEP during an initialization")
   153  					if firstTry {
   154  						// First we try getting CEP from the API server cache, as it's cheaper.
   155  						// If it fails we get it from etcd to be sure to have fresh data.
   156  						localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Get(ctx, cepName, meta_v1.GetOptions{ResourceVersion: "0"})
   157  						firstTry = false
   158  					} else {
   159  						localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Get(ctx, cepName, meta_v1.GetOptions{})
   160  					}
   161  					// It's only an error if it exists but something else happened
   162  					switch {
   163  					case err == nil:
   164  						// Backfill the CEP UID as we need to do if the CEP was
   165  						// created on an agent version that did not yet store the
   166  						// UID at CEP create time.
   167  						if err := updateCEPUID(scopedLog, e, localCEP); err != nil {
   168  							scopedLog.WithError(err).Warn("could not take ownership of existing ciliumendpoint")
   169  							return err
   170  						}
   171  					case k8serrors.IsNotFound(err):
   172  						// We can't create localCEP directly, it must come from the k8s
   173  						// server via an API call.
   174  						cep := &cilium_v2.CiliumEndpoint{
   175  							ObjectMeta: meta_v1.ObjectMeta{
   176  								Name: cepName,
   177  								OwnerReferences: []meta_v1.OwnerReference{
   178  									{
   179  										APIVersion: cepOwner.GetAPIVersion(),
   180  										Kind:       cepOwner.GetKind(),
   181  										Name:       cepOwner.GetName(),
   182  										UID:        cepOwner.GetUID(),
   183  									},
   184  								},
   185  								// Mirror the labels of parent pod in CiliumEndpoint object to enable
   186  								// label based selection for CiliumEndpoints.
   187  								Labels: cepOwner.GetLabels(),
   188  							},
   189  							Status: *mdl,
   190  						}
   191  						localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Create(ctx, cep, meta_v1.CreateOptions{})
   192  						if err != nil {
   193  							// Suppress logging an error if ep backing the pod was terminated
   194  							// before CEP could be created and shut down the controller.
   195  							if errors.Is(err, context.Canceled) {
   196  								return nil
   197  							}
   198  
   199  							scopedLog.WithError(err).Error("Cannot create CEP")
   200  							return err
   201  						}
   202  
   203  						scopedLog.WithField(logfields.CEPUID, localCEP.UID).Debug("storing CEP UID after create")
   204  						e.SetCiliumEndpointUID(localCEP.UID)
   205  
   206  						// continue the execution so we update the endpoint
   207  						// status immediately upon endpoint creation
   208  					default:
   209  						scopedLog.WithError(err).Warn("Error getting CEP")
   210  						return err
   211  					}
   212  
   213  					// We return earlier for all error cases so we don't need
   214  					// to init the local endpoint in non-error cases.
   215  					needInit = false
   216  					lastMdl = &localCEP.Status
   217  					// We still need to update the CEP if localCEP is out of sync with upstream.
   218  					// We only return if upstream is NOT out-of-sync here.
   219  					if mdl.DeepEqual(lastMdl) {
   220  						scopedLog.Debug("Skipping CiliumEndpoint update because it has not changed")
   221  						return nil
   222  					}
   223  				}
   224  				// We have no localCEP copy. We need to fetch it for updates, below.
   225  				// This is unexpected as there should be only 1 writer per CEP, this
   226  				// controller, and the localCEP created on startup will be used.
   227  				if localCEP == nil {
   228  					localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Get(ctx, cepName, meta_v1.GetOptions{})
   229  					switch {
   230  					case err == nil:
   231  						// Backfill the CEP UID as we need to do if the CEP was
   232  						// created on an agent version that did not yet store the
   233  						// UID at CEP create time.
   234  						if err := updateCEPUID(scopedLog, e, localCEP); err != nil {
   235  							scopedLog.WithError(err).Warn("could not take ownership of existing ciliumendpoint")
   236  							return err
   237  						}
   238  
   239  					// The CEP doesn't exist in k8s. This is unexpetected but may occur
   240  					// if the endpoint was removed from k8s but not yet within the agent.
   241  					// Mark the CEP for creation on the next controller iteration. This
   242  					// may never occur if the controller is stopped on Endpoint delete.
   243  					case k8serrors.IsNotFound(err):
   244  						needInit = true
   245  						return err
   246  
   247  					// We cannot read the upstream CEP. needInit will cause the next
   248  					// iteration to delete and create the CEP. This is an unexpected
   249  					// situation.
   250  					case k8serrors.IsInvalid(err):
   251  						scopedLog.WithError(err).Warn("Invalid CEP during update")
   252  						needInit = true
   253  						return nil
   254  
   255  					// A real error
   256  					default:
   257  						scopedLog.WithError(err).Error("Cannot get CEP during update")
   258  						return err
   259  					}
   260  				}
   261  
   262  				// For json patch we don't need to perform a GET for endpoints
   263  
   264  				// If it fails it means the test from the previous patch failed
   265  				// so we can safely replace this node in the CNP status.
   266  				replaceCEPStatus := []k8s.JSONPatch{
   267  					// If the stored UID matches the one in the ciliumendpoint then
   268  					// this first patch is a no-op, otherwise the entire patch will
   269  					// not be applied as uid is immutable.
   270  					{
   271  						OP:    "test",
   272  						Path:  "/metadata/uid",
   273  						Value: e.GetCiliumEndpointUID(),
   274  					},
   275  					{
   276  						OP:    "replace",
   277  						Path:  "/status",
   278  						Value: mdl,
   279  					},
   280  				}
   281  				var createStatusPatch []byte
   282  				createStatusPatch, err = json.Marshal(replaceCEPStatus)
   283  				if err != nil {
   284  					return err
   285  				}
   286  
   287  				localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Patch(
   288  					ctx, cepName,
   289  					k8stypes.JSONPatchType,
   290  					createStatusPatch,
   291  					meta_v1.PatchOptions{})
   292  
   293  				// Handle Update errors or return successfully
   294  				switch {
   295  				// Return no error when we see a conflict. We want to retry without a
   296  				// backoff and the Update* calls returned the current localCEP
   297  				case err != nil && k8serrors.IsConflict(err):
   298  					scopedLog.WithError(err).Warn("Cannot update CEP due to a revision conflict. The next controller execution will try again")
   299  					needInit = true
   300  					return nil
   301  
   302  				// Ensure we re-init when we see a generic error. This will recrate the
   303  				// CEP.
   304  				case err != nil:
   305  					// Suppress logging an error if ep backing the pod was terminated
   306  					// before CEP could be updated and shut down the controller.
   307  					if errors.Is(err, context.Canceled) {
   308  						return nil
   309  					}
   310  					scopedLog.WithError(err).Error("Cannot update CEP")
   311  
   312  					needInit = true
   313  					return err
   314  
   315  				// A successful update means no more updates unless the endpoint status, aka mdl, changes
   316  				default:
   317  					lastMdl = mdl
   318  					return nil
   319  				}
   320  			},
   321  			StopFunc: func(ctx context.Context) error {
   322  				return deleteCEP(ctx, scopedLog, ciliumClient, e)
   323  			},
   324  		})
   325  }
   326  
   327  // updateCEPUID attempts to update the endpoints UID to be that of localCEP.
   328  // This in effect takes ownership of the referenced CEP, thus we can only
   329  // do this if it is safe to do so. Otherwise an error is returned.
   330  //
   331  // One caveat is that, although endpoints are now restored to reference their
   332  // previous CEP, this has to handle cases where agent was upgraded from a version
   333  // that did not store CEP UIDs in the restore state header.
   334  // It is only safe to do so if the CEP is local.
   335  //
   336  // In all cases where the endpoint cannot take ownership of a CEP, it is assumed
   337  // that this is a temporary state where either the local/remote agent managing the CEP
   338  // is shutting down and will delete the CEP, or the CEP is stale and needs to be cleaned
   339  // up by the operator.
   340  func updateCEPUID(scopedLog *logrus.Entry, e *endpoint.Endpoint, localCEP *cilium_v2.CiliumEndpoint) error {
   341  	// It's possible we already own this CEP, as in the case of a restore after restart.
   342  	// If the Endpoint already owns the CEP (by holding the matching CEP UID reference) then we don't have to
   343  	// worry about other ownership checks.
   344  	//
   345  	// This will cover cases such as if the NodeIP changes (as with a reboot).
   346  	// In which case we can safely take ownership and overwrite the CEPs
   347  	// status. However if the cilium endpoints are lost on restart (eg the
   348  	// state files were previously checkpointed into tmpfs) this check will
   349  	// fail and we will rely on the next check to prevent us from hijacking
   350  	// CEPs.
   351  	cepUID := e.GetCiliumEndpointUID()
   352  	if cepUID == localCEP.UID {
   353  		return nil
   354  	}
   355  
   356  	// We do not want to take ownership of CEPs created on other Nodes.
   357  	// However we can't directly compare the CEP node ip with the node, because
   358  	// the node ip can change, orphaning the CEP. So we retrieve the pod for
   359  	// the CEP and compare its node IP with that of the node. The kubelet on
   360  	// this node will update the pod object appropriately, allowing this check
   361  	// to eventually go through.
   362  	//
   363  	// The intent here is to check if a given pod is running on the same node
   364  	// this cilium is running on before taking over its CEP.
   365  	cepOwner := e.GetCEPOwner()
   366  	if cepOwner.IsNil() {
   367  		return fmt.Errorf("endpoint sync cannot take ownership of CEP: no pod")
   368  	}
   369  	podHostIP := cepOwner.GetHostIP()
   370  	if podHostIP == "" {
   371  		return fmt.Errorf("endpoint sync cannot take ownership of CEP: no pod HostIP")
   372  	}
   373  	if nodeIP := node.GetIPv4().String(); podHostIP != nodeIP {
   374  		// Also checking node ipv6 for k8s dual stack with ipv6 preference where
   375  		// podHostIP is gonna be node ipv6
   376  		if nodeIPV6 := node.GetIPv6().String(); podHostIP != nodeIPV6 {
   377  			return fmt.Errorf("endpoint sync cannot take ownership of CEP that is not local: CEP's pod %q, pod's hostIP %q, cilium nodeIP %q)",
   378  				e.GetK8sPodName(), podHostIP, nodeIP)
   379  		}
   380  	}
   381  
   382  	// If the endpoint has a CEP UID, which does not match the current CEP, we cannot take
   383  	// ownership.
   384  	if cepUID != "" && cepUID != localCEP.GetUID() {
   385  		return fmt.Errorf("endpoint sync could not take ownership of CEP %q, endpoint UID (%q) did not match CEP UID: %q",
   386  			localCEP.GetNamespace()+"/"+localCEP.GetName(), cepUID, localCEP.GetUID())
   387  	}
   388  
   389  	if cepUID := e.GetCiliumEndpointUID(); cepUID == "" {
   390  		scopedLog.WithFields(logrus.Fields{
   391  			logfields.Node:           types.GetName(),
   392  			"old" + logfields.CEPUID: cepUID,
   393  			logfields.CEPUID:         localCEP.UID,
   394  		}).Debug("updating CEP UID and syncing endpoint header file")
   395  		e.SetCiliumEndpointUID(localCEP.UID)
   396  		e.SyncEndpointHeaderFile()
   397  	}
   398  	return nil
   399  }
   400  
   401  // DeleteK8sCiliumEndpointSync replaces the endpoint controller to remove the
   402  // CEP from Kubernetes once the endpoint is stopped / removed from the
   403  // Cilium agent.
   404  func (epSync *EndpointSynchronizer) DeleteK8sCiliumEndpointSync(e *endpoint.Endpoint) {
   405  	controllerName := endpoint.EndpointSyncControllerName(e.ID)
   406  
   407  	scopedLog := e.Logger(subsysEndpointSync).WithField("controller", controllerName)
   408  
   409  	if !epSync.Clientset.IsEnabled() {
   410  		scopedLog.Debug("Not starting controller because k8s is disabled")
   411  		return
   412  	}
   413  	ciliumClient := epSync.Clientset.CiliumV2()
   414  
   415  	// The health endpoint doesn't really exist in k8s and updates to it caused
   416  	// arbitrary errors. Disable the controller for these endpoints.
   417  	if isHealthEP := e.HasLabels(pkgLabels.LabelHealth); isHealthEP {
   418  		scopedLog.Debug("Not starting unnecessary CEP controller for cilium-health endpoint")
   419  		return
   420  	}
   421  
   422  	// NOTE: The controller functions do NOT hold the endpoint locks
   423  	e.UpdateController(controllerName,
   424  		controller.ControllerParams{
   425  			Group: ciliumEndpointToK8sSyncControllerGroup,
   426  			StopFunc: func(ctx context.Context) error {
   427  				return deleteCEP(ctx, scopedLog, ciliumClient, e)
   428  			},
   429  		},
   430  	)
   431  }
   432  
   433  func deleteCEP(ctx context.Context, scopedLog *logrus.Entry, ciliumClient v2.CiliumV2Interface, e *endpoint.Endpoint) error {
   434  	cepName := e.GetK8sCEPName()
   435  	if cepName == "" {
   436  		scopedLog.Debug("Skipping CiliumEndpoint deletion because it has no k8s cep name")
   437  		return nil
   438  	}
   439  
   440  	cepOwner := e.GetCEPOwner()
   441  	if cepOwner.IsNil() {
   442  		scopedLog.Debug("Skipping CiliumEndpoint deletion because owner is nil")
   443  		return nil
   444  	}
   445  
   446  	// A CEP should be only be deleted by the agent that manages the
   447  	// corresponding pod. However, it is possible for a pod to restart and be
   448  	// scheduled onto a different node while the agent on the original node was
   449  	// down, which would cause the CEP to be deleted once the original agent came
   450  	// back up. (This holds for StatefulSets in particular that come with stable
   451  	// pod identifiers and thus do not guard against such accidental deletes
   452  	// through unique pod names.) Storing the CEP UID at CEP create/fetch time
   453  	// and using it as a precondition for deletion ensures that agents may only
   454  	// delete CEPs they own.
   455  	// It is possible for the CEP UID to not be populated when an agent tries to
   456  	// clean up a CEP. In that case, skip deletion and rely on cilium operator
   457  	// garbage collection to clean up eventually.
   458  	cepUID := e.GetCiliumEndpointUID()
   459  	if cepUID == "" {
   460  		scopedLog.Debug("Skipping CiliumEndpoint deletion because it has no UID")
   461  		return nil
   462  	}
   463  
   464  	scopedLog.WithField(logfields.CEPUID, cepUID).Debug("deleting CEP with UID")
   465  	if err := ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Delete(ctx, cepName, meta_v1.DeleteOptions{
   466  		Preconditions: &meta_v1.Preconditions{
   467  			UID: &cepUID,
   468  		},
   469  	}); err != nil {
   470  		if !k8serrors.IsNotFound(err) && !k8serrors.IsConflict(err) {
   471  			scopedLog.WithError(err).Warning("Unable to delete CEP")
   472  		}
   473  	}
   474  	return nil
   475  }