github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/endpointsynchronizer.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package endpointmanager 5 6 import ( 7 "context" 8 "encoding/json" 9 "errors" 10 "fmt" 11 12 "github.com/blang/semver/v4" 13 "github.com/cilium/hive/cell" 14 "github.com/sirupsen/logrus" 15 k8serrors "k8s.io/apimachinery/pkg/api/errors" 16 meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 k8stypes "k8s.io/apimachinery/pkg/types" 18 19 "github.com/cilium/cilium/pkg/controller" 20 "github.com/cilium/cilium/pkg/endpoint" 21 "github.com/cilium/cilium/pkg/k8s" 22 cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 23 "github.com/cilium/cilium/pkg/k8s/client" 24 v2 "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned/typed/cilium.io/v2" 25 k8sversion "github.com/cilium/cilium/pkg/k8s/version" 26 pkgLabels "github.com/cilium/cilium/pkg/labels" 27 "github.com/cilium/cilium/pkg/logging/logfields" 28 "github.com/cilium/cilium/pkg/node" 29 "github.com/cilium/cilium/pkg/node/types" 30 "github.com/cilium/cilium/pkg/option" 31 "github.com/cilium/cilium/pkg/time" 32 ) 33 34 const ( 35 // subsysEndpointSync is the value for logfields.LogSubsys 36 subsysEndpointSync = "endpointsynchronizer" 37 ) 38 39 var ciliumEndpointToK8sSyncControllerGroup = controller.NewGroup("sync-to-k8s-ciliumendpoint") 40 41 // EndpointSynchronizer currently is an empty type, which wraps around syncing 42 // of CiliumEndpoint resources. 43 type EndpointSynchronizer struct { 44 Clientset client.Clientset 45 } 46 47 // RunK8sCiliumEndpointSync starts a controller that synchronizes the endpoint 48 // to the corresponding k8s CiliumEndpoint CRD. It is expected that each CEP 49 // has 1 controller that updates it, and a local copy is retained and only 50 // updates are pushed up. 51 // CiliumEndpoint objects have the same name as the pod they represent. 52 func (epSync *EndpointSynchronizer) RunK8sCiliumEndpointSync(e *endpoint.Endpoint, h cell.Health) { 53 var ( 54 endpointID = e.ID 55 controllerName = endpoint.EndpointSyncControllerName(endpointID) 56 scopedLog = e.Logger(subsysEndpointSync).WithFields(logrus.Fields{ 57 "controller": controllerName, 58 "endpointID": e.ID, 59 }) 60 ) 61 62 if option.Config.DisableCiliumEndpointCRD { 63 h.Stopped("ciliumendpoint CRD disabled") 64 scopedLog.Debug("Not running controller. CEP CRD synchronization is disabled") 65 return 66 } 67 68 if !epSync.Clientset.IsEnabled() { 69 h.Stopped("k8s client-set disabled") 70 scopedLog.Debug("Not starting controller because k8s is disabled") 71 return 72 } 73 74 ciliumClient := epSync.Clientset.CiliumV2() 75 76 // The health endpoint doesn't really exist in k8s and updates to it caused 77 // arbitrary errors. Disable the controller for these endpoints. 78 if isHealthEP := e.HasLabels(pkgLabels.LabelHealth); isHealthEP { 79 h.Stopped("Cilium health endpoint has no CEP object for k8s sync") 80 scopedLog.Debug("Not starting unnecessary CEP controller for cilium-health endpoint") 81 return 82 } 83 84 // The CEP name is derived from the K8sPodName and K8sNamespace. 85 // They should always be available if an endpoint belongs to a pod. 86 cepName := e.GetK8sCEPName() 87 if cepName == "" { 88 h.Stopped("Endpoint synchronizer stopped due to missing CEP metadata") 89 scopedLog.Debug("Skipping CiliumEndpoint update because it has no k8s cep name") 90 return 91 } 92 93 var ( 94 lastMdl *cilium_v2.EndpointStatus 95 localCEP *cilium_v2.CiliumEndpoint // the local copy of the CEP object. Reused. 96 needInit = true // needInit indicates that we may need to create the CEP 97 firstTry = true // Try to get CEP from k8s cache 98 ) 99 100 // NOTE: The controller functions do NOT hold the endpoint locks 101 e.UpdateController(controllerName, 102 controller.ControllerParams{ 103 Group: ciliumEndpointToK8sSyncControllerGroup, 104 RunInterval: 10 * time.Second, 105 Health: h, 106 DoFunc: func(ctx context.Context) (err error) { 107 // Update logger as scopeLog might not have the podName when it 108 // was created. 109 scopedLog = e.Logger(subsysEndpointSync).WithField("controller", controllerName) 110 111 if k8sversion.Version().Equals(semver.Version{}) { 112 return fmt.Errorf("Kubernetes apiserver is not available") 113 } 114 115 cepOwner := e.GetCEPOwner() 116 if cepOwner.IsNil() { 117 scopedLog.Debug("Skipping CiliumEndpoint update because it has no k8s namespace") 118 return nil 119 } 120 121 if !e.HaveK8sMetadata() { 122 scopedLog.Debug("Skipping CiliumEndpoint update because k8s metadata is not yet available") 123 return nil 124 } 125 126 identity, err := e.GetSecurityIdentity() 127 if err != nil { 128 return err 129 } 130 if identity == nil { 131 scopedLog.Debug("Skipping CiliumEndpoint update because security identity is not yet available") 132 return nil 133 } 134 135 // Serialize the endpoint into a model. It is compared with the one 136 // from before, only updating on changes. 137 mdl := e.GetCiliumEndpointStatus() 138 if !needInit && mdl.DeepEqual(lastMdl) { 139 scopedLog.Debug("Skipping CiliumEndpoint update because it has not changed") 140 return nil 141 } 142 143 if needInit { 144 state := e.GetState() 145 // Don't bother to create if the 146 // endpoint is already disconnecting 147 if state == endpoint.StateDisconnecting || 148 state == endpoint.StateDisconnected { 149 return nil 150 } 151 152 scopedLog.Debug("Getting CEP during an initialization") 153 if firstTry { 154 // First we try getting CEP from the API server cache, as it's cheaper. 155 // If it fails we get it from etcd to be sure to have fresh data. 156 localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Get(ctx, cepName, meta_v1.GetOptions{ResourceVersion: "0"}) 157 firstTry = false 158 } else { 159 localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Get(ctx, cepName, meta_v1.GetOptions{}) 160 } 161 // It's only an error if it exists but something else happened 162 switch { 163 case err == nil: 164 // Backfill the CEP UID as we need to do if the CEP was 165 // created on an agent version that did not yet store the 166 // UID at CEP create time. 167 if err := updateCEPUID(scopedLog, e, localCEP); err != nil { 168 scopedLog.WithError(err).Warn("could not take ownership of existing ciliumendpoint") 169 return err 170 } 171 case k8serrors.IsNotFound(err): 172 // We can't create localCEP directly, it must come from the k8s 173 // server via an API call. 174 cep := &cilium_v2.CiliumEndpoint{ 175 ObjectMeta: meta_v1.ObjectMeta{ 176 Name: cepName, 177 OwnerReferences: []meta_v1.OwnerReference{ 178 { 179 APIVersion: cepOwner.GetAPIVersion(), 180 Kind: cepOwner.GetKind(), 181 Name: cepOwner.GetName(), 182 UID: cepOwner.GetUID(), 183 }, 184 }, 185 // Mirror the labels of parent pod in CiliumEndpoint object to enable 186 // label based selection for CiliumEndpoints. 187 Labels: cepOwner.GetLabels(), 188 }, 189 Status: *mdl, 190 } 191 localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Create(ctx, cep, meta_v1.CreateOptions{}) 192 if err != nil { 193 // Suppress logging an error if ep backing the pod was terminated 194 // before CEP could be created and shut down the controller. 195 if errors.Is(err, context.Canceled) { 196 return nil 197 } 198 199 scopedLog.WithError(err).Error("Cannot create CEP") 200 return err 201 } 202 203 scopedLog.WithField(logfields.CEPUID, localCEP.UID).Debug("storing CEP UID after create") 204 e.SetCiliumEndpointUID(localCEP.UID) 205 206 // continue the execution so we update the endpoint 207 // status immediately upon endpoint creation 208 default: 209 scopedLog.WithError(err).Warn("Error getting CEP") 210 return err 211 } 212 213 // We return earlier for all error cases so we don't need 214 // to init the local endpoint in non-error cases. 215 needInit = false 216 lastMdl = &localCEP.Status 217 // We still need to update the CEP if localCEP is out of sync with upstream. 218 // We only return if upstream is NOT out-of-sync here. 219 if mdl.DeepEqual(lastMdl) { 220 scopedLog.Debug("Skipping CiliumEndpoint update because it has not changed") 221 return nil 222 } 223 } 224 // We have no localCEP copy. We need to fetch it for updates, below. 225 // This is unexpected as there should be only 1 writer per CEP, this 226 // controller, and the localCEP created on startup will be used. 227 if localCEP == nil { 228 localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Get(ctx, cepName, meta_v1.GetOptions{}) 229 switch { 230 case err == nil: 231 // Backfill the CEP UID as we need to do if the CEP was 232 // created on an agent version that did not yet store the 233 // UID at CEP create time. 234 if err := updateCEPUID(scopedLog, e, localCEP); err != nil { 235 scopedLog.WithError(err).Warn("could not take ownership of existing ciliumendpoint") 236 return err 237 } 238 239 // The CEP doesn't exist in k8s. This is unexpetected but may occur 240 // if the endpoint was removed from k8s but not yet within the agent. 241 // Mark the CEP for creation on the next controller iteration. This 242 // may never occur if the controller is stopped on Endpoint delete. 243 case k8serrors.IsNotFound(err): 244 needInit = true 245 return err 246 247 // We cannot read the upstream CEP. needInit will cause the next 248 // iteration to delete and create the CEP. This is an unexpected 249 // situation. 250 case k8serrors.IsInvalid(err): 251 scopedLog.WithError(err).Warn("Invalid CEP during update") 252 needInit = true 253 return nil 254 255 // A real error 256 default: 257 scopedLog.WithError(err).Error("Cannot get CEP during update") 258 return err 259 } 260 } 261 262 // For json patch we don't need to perform a GET for endpoints 263 264 // If it fails it means the test from the previous patch failed 265 // so we can safely replace this node in the CNP status. 266 replaceCEPStatus := []k8s.JSONPatch{ 267 // If the stored UID matches the one in the ciliumendpoint then 268 // this first patch is a no-op, otherwise the entire patch will 269 // not be applied as uid is immutable. 270 { 271 OP: "test", 272 Path: "/metadata/uid", 273 Value: e.GetCiliumEndpointUID(), 274 }, 275 { 276 OP: "replace", 277 Path: "/status", 278 Value: mdl, 279 }, 280 } 281 var createStatusPatch []byte 282 createStatusPatch, err = json.Marshal(replaceCEPStatus) 283 if err != nil { 284 return err 285 } 286 287 localCEP, err = ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Patch( 288 ctx, cepName, 289 k8stypes.JSONPatchType, 290 createStatusPatch, 291 meta_v1.PatchOptions{}) 292 293 // Handle Update errors or return successfully 294 switch { 295 // Return no error when we see a conflict. We want to retry without a 296 // backoff and the Update* calls returned the current localCEP 297 case err != nil && k8serrors.IsConflict(err): 298 scopedLog.WithError(err).Warn("Cannot update CEP due to a revision conflict. The next controller execution will try again") 299 needInit = true 300 return nil 301 302 // Ensure we re-init when we see a generic error. This will recrate the 303 // CEP. 304 case err != nil: 305 // Suppress logging an error if ep backing the pod was terminated 306 // before CEP could be updated and shut down the controller. 307 if errors.Is(err, context.Canceled) { 308 return nil 309 } 310 scopedLog.WithError(err).Error("Cannot update CEP") 311 312 needInit = true 313 return err 314 315 // A successful update means no more updates unless the endpoint status, aka mdl, changes 316 default: 317 lastMdl = mdl 318 return nil 319 } 320 }, 321 StopFunc: func(ctx context.Context) error { 322 return deleteCEP(ctx, scopedLog, ciliumClient, e) 323 }, 324 }) 325 } 326 327 // updateCEPUID attempts to update the endpoints UID to be that of localCEP. 328 // This in effect takes ownership of the referenced CEP, thus we can only 329 // do this if it is safe to do so. Otherwise an error is returned. 330 // 331 // One caveat is that, although endpoints are now restored to reference their 332 // previous CEP, this has to handle cases where agent was upgraded from a version 333 // that did not store CEP UIDs in the restore state header. 334 // It is only safe to do so if the CEP is local. 335 // 336 // In all cases where the endpoint cannot take ownership of a CEP, it is assumed 337 // that this is a temporary state where either the local/remote agent managing the CEP 338 // is shutting down and will delete the CEP, or the CEP is stale and needs to be cleaned 339 // up by the operator. 340 func updateCEPUID(scopedLog *logrus.Entry, e *endpoint.Endpoint, localCEP *cilium_v2.CiliumEndpoint) error { 341 // It's possible we already own this CEP, as in the case of a restore after restart. 342 // If the Endpoint already owns the CEP (by holding the matching CEP UID reference) then we don't have to 343 // worry about other ownership checks. 344 // 345 // This will cover cases such as if the NodeIP changes (as with a reboot). 346 // In which case we can safely take ownership and overwrite the CEPs 347 // status. However if the cilium endpoints are lost on restart (eg the 348 // state files were previously checkpointed into tmpfs) this check will 349 // fail and we will rely on the next check to prevent us from hijacking 350 // CEPs. 351 cepUID := e.GetCiliumEndpointUID() 352 if cepUID == localCEP.UID { 353 return nil 354 } 355 356 // We do not want to take ownership of CEPs created on other Nodes. 357 // However we can't directly compare the CEP node ip with the node, because 358 // the node ip can change, orphaning the CEP. So we retrieve the pod for 359 // the CEP and compare its node IP with that of the node. The kubelet on 360 // this node will update the pod object appropriately, allowing this check 361 // to eventually go through. 362 // 363 // The intent here is to check if a given pod is running on the same node 364 // this cilium is running on before taking over its CEP. 365 cepOwner := e.GetCEPOwner() 366 if cepOwner.IsNil() { 367 return fmt.Errorf("endpoint sync cannot take ownership of CEP: no pod") 368 } 369 podHostIP := cepOwner.GetHostIP() 370 if podHostIP == "" { 371 return fmt.Errorf("endpoint sync cannot take ownership of CEP: no pod HostIP") 372 } 373 if nodeIP := node.GetIPv4().String(); podHostIP != nodeIP { 374 // Also checking node ipv6 for k8s dual stack with ipv6 preference where 375 // podHostIP is gonna be node ipv6 376 if nodeIPV6 := node.GetIPv6().String(); podHostIP != nodeIPV6 { 377 return fmt.Errorf("endpoint sync cannot take ownership of CEP that is not local: CEP's pod %q, pod's hostIP %q, cilium nodeIP %q)", 378 e.GetK8sPodName(), podHostIP, nodeIP) 379 } 380 } 381 382 // If the endpoint has a CEP UID, which does not match the current CEP, we cannot take 383 // ownership. 384 if cepUID != "" && cepUID != localCEP.GetUID() { 385 return fmt.Errorf("endpoint sync could not take ownership of CEP %q, endpoint UID (%q) did not match CEP UID: %q", 386 localCEP.GetNamespace()+"/"+localCEP.GetName(), cepUID, localCEP.GetUID()) 387 } 388 389 if cepUID := e.GetCiliumEndpointUID(); cepUID == "" { 390 scopedLog.WithFields(logrus.Fields{ 391 logfields.Node: types.GetName(), 392 "old" + logfields.CEPUID: cepUID, 393 logfields.CEPUID: localCEP.UID, 394 }).Debug("updating CEP UID and syncing endpoint header file") 395 e.SetCiliumEndpointUID(localCEP.UID) 396 e.SyncEndpointHeaderFile() 397 } 398 return nil 399 } 400 401 // DeleteK8sCiliumEndpointSync replaces the endpoint controller to remove the 402 // CEP from Kubernetes once the endpoint is stopped / removed from the 403 // Cilium agent. 404 func (epSync *EndpointSynchronizer) DeleteK8sCiliumEndpointSync(e *endpoint.Endpoint) { 405 controllerName := endpoint.EndpointSyncControllerName(e.ID) 406 407 scopedLog := e.Logger(subsysEndpointSync).WithField("controller", controllerName) 408 409 if !epSync.Clientset.IsEnabled() { 410 scopedLog.Debug("Not starting controller because k8s is disabled") 411 return 412 } 413 ciliumClient := epSync.Clientset.CiliumV2() 414 415 // The health endpoint doesn't really exist in k8s and updates to it caused 416 // arbitrary errors. Disable the controller for these endpoints. 417 if isHealthEP := e.HasLabels(pkgLabels.LabelHealth); isHealthEP { 418 scopedLog.Debug("Not starting unnecessary CEP controller for cilium-health endpoint") 419 return 420 } 421 422 // NOTE: The controller functions do NOT hold the endpoint locks 423 e.UpdateController(controllerName, 424 controller.ControllerParams{ 425 Group: ciliumEndpointToK8sSyncControllerGroup, 426 StopFunc: func(ctx context.Context) error { 427 return deleteCEP(ctx, scopedLog, ciliumClient, e) 428 }, 429 }, 430 ) 431 } 432 433 func deleteCEP(ctx context.Context, scopedLog *logrus.Entry, ciliumClient v2.CiliumV2Interface, e *endpoint.Endpoint) error { 434 cepName := e.GetK8sCEPName() 435 if cepName == "" { 436 scopedLog.Debug("Skipping CiliumEndpoint deletion because it has no k8s cep name") 437 return nil 438 } 439 440 cepOwner := e.GetCEPOwner() 441 if cepOwner.IsNil() { 442 scopedLog.Debug("Skipping CiliumEndpoint deletion because owner is nil") 443 return nil 444 } 445 446 // A CEP should be only be deleted by the agent that manages the 447 // corresponding pod. However, it is possible for a pod to restart and be 448 // scheduled onto a different node while the agent on the original node was 449 // down, which would cause the CEP to be deleted once the original agent came 450 // back up. (This holds for StatefulSets in particular that come with stable 451 // pod identifiers and thus do not guard against such accidental deletes 452 // through unique pod names.) Storing the CEP UID at CEP create/fetch time 453 // and using it as a precondition for deletion ensures that agents may only 454 // delete CEPs they own. 455 // It is possible for the CEP UID to not be populated when an agent tries to 456 // clean up a CEP. In that case, skip deletion and rely on cilium operator 457 // garbage collection to clean up eventually. 458 cepUID := e.GetCiliumEndpointUID() 459 if cepUID == "" { 460 scopedLog.Debug("Skipping CiliumEndpoint deletion because it has no UID") 461 return nil 462 } 463 464 scopedLog.WithField(logfields.CEPUID, cepUID).Debug("deleting CEP with UID") 465 if err := ciliumClient.CiliumEndpoints(cepOwner.GetNamespace()).Delete(ctx, cepName, meta_v1.DeleteOptions{ 466 Preconditions: &meta_v1.Preconditions{ 467 UID: &cepUID, 468 }, 469 }); err != nil { 470 if !k8serrors.IsNotFound(err) && !k8serrors.IsConflict(err) { 471 scopedLog.WithError(err).Warning("Unable to delete CEP") 472 } 473 } 474 return nil 475 }